python中module内全局变量的局部性

发布时间 2023-08-10 18:04:11作者: tsecer

问题

python中的模块主要通过import实现。一个常见的典型场景是在A模块import模块B的func,并调用B模块的func;而B模块的func又引用了所在模块B的gvar变量。这个过程中A其实只import了模块B的func函数,在A模块调用B模块的func时,func函数能不忘初心,找到自己当初定义模块中的gvar吗?

当然,这个问题很容易验证,这里讨论的是python内部的实现机制。

MAKE_FUNCTION

在执行函数定义(MAKE_FUNCTION)时,函数定义结构会把模块的global保存在新定义结构中。


        TARGET(MAKE_FUNCTION) {
            PyObject *qualname = POP();
            PyObject *codeobj = POP();
            PyFunctionObject *func = (PyFunctionObject *)
                PyFunction_NewWithQualName(codeobj, f->f_globals, qualname);

///
PyObject *
PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname)
{
    PyFunctionObject *op;
    PyObject *doc, *consts, *module;
    static PyObject *__name__ = NULL;

    if (__name__ == NULL) {
        __name__ = PyUnicode_InternFromString("__name__");
        if (__name__ == NULL)
            return NULL;
    }

    op = PyObject_GC_New(PyFunctionObject, &PyFunction_Type);
    if (op == NULL)
        return NULL;

    op->func_weakreflist = NULL;
    Py_INCREF(code);
    op->func_code = code;
    Py_INCREF(globals);
    op->func_globals = globals;
    ///...
}

CALL_FUNCTION

在调用函数中,从函数定义中内容找到func_globals字段,并传递给函数栈帧执行。

#define PyFunction_GET_GLOBALS(func) \
	(((PyFunctionObject *)func) -> func_globals)
	
static PyObject *
fast_function(PyObject *func, PyObject **stack,
              Py_ssize_t nargs, PyObject *kwnames)
{
    PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func);
    PyObject *globals = PyFunction_GET_GLOBALS(func);
    PyObject *argdefs = PyFunction_GET_DEFAULTS(func);
    PyObject *kwdefs, *closure, *name, *qualname;
    PyObject **d;
    Py_ssize_t nkwargs = (kwnames == NULL) ? 0 : PyTuple_GET_SIZE(kwnames);
    Py_ssize_t nd;

    assert(PyFunction_Check(func));
    assert(nargs >= 0);
    assert(kwnames == NULL || PyTuple_CheckExact(kwnames));
    assert((nargs == 0 && nkwargs == 0) || stack != NULL);
    /* kwnames must only contains str strings, no subclass, and all keys must
       be unique */

    PCALL(PCALL_FUNCTION);
    PCALL(PCALL_FAST_FUNCTION);

    if (co->co_kwonlyargcount == 0 && nkwargs == 0 &&
        co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE))
    {
        if (argdefs == NULL && co->co_argcount == nargs) {
            return _PyFunction_FastCall(co, stack, nargs, globals);
        }
        else if (nargs == 0 && argdefs != NULL
                 && co->co_argcount == Py_SIZE(argdefs)) {
            /* function called with no arguments, but all parameters have
               a default value: use default values as arguments .*/
            stack = &PyTuple_GET_ITEM(argdefs, 0);
            return _PyFunction_FastCall(co, stack, Py_SIZE(argdefs), globals);
        }
    }

    kwdefs = PyFunction_GET_KW_DEFAULTS(func);
    closure = PyFunction_GET_CLOSURE(func);
    name = ((PyFunctionObject *)func) -> func_name;
    qualname = ((PyFunctionObject *)func) -> func_qualname;

    if (argdefs != NULL) {
        d = &PyTuple_GET_ITEM(argdefs, 0);
        nd = Py_SIZE(argdefs);
    }
    else {
        d = NULL;
        nd = 0;
    }
    return _PyEval_EvalCodeWithName((PyObject*)co, globals, (PyObject *)NULL,
                                    stack, nargs,
                                    nkwargs ? &PyTuple_GET_ITEM(kwnames, 0) : NULL,
                                    stack + nargs,
                                    nkwargs, 1,
                                    d, (int)nd, kwdefs,
                                    closure, name, qualname);
}

原始global

前面的MAKE_FUNCTION和CALL_FUNCTION都是从函数定义的func_globals取数据,貌似陷入了一个“鸡生蛋,蛋生鸡”的怪圈。那最原始的global是从哪里来的呢?

在创建一个新模块时,会通过module_dict_for_exec函数为模块创建一个新的global字典。在执行PyEval_EvalCode函数的时候,传入的globals和locals都是这个新创建的模块字典。

PyObject*
PyImport_ExecCodeModuleObject(PyObject *name, PyObject *co, PyObject *pathname,
                              PyObject *cpathname)
{
    PyObject *d, *external, *res;
    PyInterpreterState *interp = PyThreadState_GET()->interp;
    _Py_IDENTIFIER(_fix_up_module);

    d = module_dict_for_exec(name);
    if (d == NULL) {
        return NULL;
    }

    if (pathname == NULL) {
        pathname = ((PyCodeObject *)co)->co_filename;
    }
    external = PyObject_GetAttrString(interp->importlib, "_bootstrap_external");
    if (external == NULL)
        return NULL;
    res = _PyObject_CallMethodIdObjArgs(external,
                                        &PyId__fix_up_module,
                                        d, name, pathname, cpathname, NULL);
    Py_DECREF(external);
    if (res != NULL) {
        Py_DECREF(res);
        res = exec_code_in_module(name, d, co);
    }
    return res;
}

static PyObject *
exec_code_in_module(PyObject *name, PyObject *module_dict, PyObject *code_object)
{
    PyObject *modules = PyImport_GetModuleDict();
    PyObject *v, *m;

    v = PyEval_EvalCode(code_object, module_dict, module_dict);
    if (v == NULL) {
        remove_module(name);
        return NULL;
    }
    Py_DECREF(v);

    if ((m = PyDict_GetItem(modules, name)) == NULL) {
        PyErr_Format(PyExc_ImportError,
                     "Loaded module %R not found in sys.modules",
                     name);
        return NULL;
    }

    Py_INCREF(m);

    return m;
}

PyObject *
PyEval_EvalCode(PyObject *co, PyObject *globals, PyObject *locals)
{
    return PyEval_EvalCodeEx(co,
                      globals, locals,
                      (PyObject **)NULL, 0,
                      (PyObject **)NULL, 0,
                      (PyObject **)NULL, 0,
                      NULL, NULL);
}

import *

从下面的代码可以明显的看到,这里的流程是将指定module的__all__或者__dict__内容导出到当前模块的_local_(注意:不是globals列表)列表中。这也意味着:如果是在一个函数内执行import,那么import的内容只会存在于函数内而不会存在于所在模块中。

///@file: Python-3.6.0\Python\ceval.c
PyObject *
_PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
{
///...
        TARGET(IMPORT_STAR) {
            PyObject *from = POP(), *locals;
            int err;
            if (PyFrame_FastToLocalsWithError(f) < 0)
                goto error;

            locals = f->f_locals;
            if (locals == NULL) {
                PyErr_SetString(PyExc_SystemError,
                    "no locals found during 'import *'");
                goto error;
            }
            err = import_all_from(locals, from);
            PyFrame_LocalsToFast(f, 0);
            Py_DECREF(from);
            if (err != 0)
                goto error;
            DISPATCH();
        }
        ///...
}


static int
import_all_from(PyObject *locals, PyObject *v)
{
    _Py_IDENTIFIER(__all__);
    _Py_IDENTIFIER(__dict__);
    PyObject *all = _PyObject_GetAttrId(v, &PyId___all__);
    PyObject *dict, *name, *value;
    int skip_leading_underscores = 0;
    int pos, err;

    if (all == NULL) {
        if (!PyErr_ExceptionMatches(PyExc_AttributeError))
            return -1; /* Unexpected error */
        PyErr_Clear();
        dict = _PyObject_GetAttrId(v, &PyId___dict__);
        if (dict == NULL) {
            if (!PyErr_ExceptionMatches(PyExc_AttributeError))
                return -1;
            PyErr_SetString(PyExc_ImportError,
            "from-import-* object has no __dict__ and no __all__");
            return -1;
        }
        all = PyMapping_Keys(dict);
        Py_DECREF(dict);
        if (all == NULL)
            return -1;
        skip_leading_underscores = 1;
    }

    for (pos = 0, err = 0; ; pos++) {
        name = PySequence_GetItem(all, pos);
        if (name == NULL) {
            if (!PyErr_ExceptionMatches(PyExc_IndexError))
                err = -1;
            else
                PyErr_Clear();
            break;
        }
        if (skip_leading_underscores &&
            PyUnicode_Check(name) &&
            PyUnicode_READY(name) != -1 &&
            PyUnicode_READ_CHAR(name, 0) == '_')
        {
            Py_DECREF(name);
            continue;
        }
        value = PyObject_GetAttr(v, name);
        if (value == NULL)
            err = -1;
        else if (PyDict_CheckExact(locals))
            err = PyDict_SetItem(locals, name, value);
        else
            err = PyObject_SetItem(locals, name, value);
        Py_DECREF(name);
        Py_XDECREF(value);
        if (err != 0)
            break;
    }
    Py_DECREF(all);
    return err;
}

可以看到,在导入的时候使用了一个不太常见的__all__变量,这个变量在stackoverflow的一个帖子里有一个详细的说明。大致来说,这个关键字就是为了给 import *这种语句使用的,这样模块就可以自己确定哪些内容希望导出而哪些不希望导出(当然,只是在使用import *的时候)。

而且,这个表达式在一些NameSpace模块中更加常见。随便找一下,在python工程中自带的Python-3.6.0\Lib\dbm_init_.py文件中就包含了一个选择性导出的__all__变量,控制这个模块的导出内容。

__all__ = ['open', 'whichdb', 'error']
defining what symbols in a module will be exported when from <module> import * is used on the module.

For example, the following code in a foo.py explicitly exports the symbols bar and baz:

__all__ = ['bar', 'baz']

waz = 5
bar = 10
def baz(): return 'baz'
These symbols can then be imported like so:

from foo import *

print(bar)
print(baz)

# The following will trigger an exception, as "waz" is not exported by the module
print(waz)
If the __all__ above is commented out, this code will then execute to completion, as the default behaviour of import * is to import all symbols that do not begin with an underscore, from the given namespace.

Reference: https://docs.python.org/tutorial/modules.html#importing-from-a-package

NOTE: __all__ affects the from <module> import * behavior only. Members that are not mentioned in __all__ are still accessible from outside the module and can be imported with from <module> import <member>.

栗子

经过这么多的分析,终于得到了一个简单的结论:

在python执行定义函数(MAKE_FUNCTION)时,会在函数定义中保存一个指向所在模块global字典的指针(就好像closure包含所有额外引用列表),执行函数(CALL_FUNCTION)时,使用的栈帧中同样会传入这个定义时绑定的global指针。从而在模块外调用该模块内定义函数时,就好像在函数内调用一样(可以访问到该模块的所有global变量)。

tsecer@harry: cat tsecer.py 
import harry

gvar = "tsecer"

harry.hello()

print(gvar)

tsecer@harry: cat harry.py 
def hello():
    print(gvar)

"""
Traceback (most recent call last):
  File "tsecer.py", line 1, in <module>
    import harry
  File "/home/harry/py.main.sub.global/harry.py", line 4, in <module>
    hello()
  File "/home/harry/py.main.sub.global/harry.py", line 2, in hello
    print(gvar)
NameError: name 'gvar' is not defined
"""
#hello()

gvar = "harry.ac"
hello()

gvar = "harry.bc"
hello()
tsecer@harry: python3 tsecer.py  
harry.ac
harry.bc
harry.bc
tsecer
tsecer@harry: