dotgnu如何实现C#的typeof操作符

发布时间 2023-06-08 20:32:17作者: tsecer

typeof

对于传统的C/C++程序员来说,反射(reflection)始终是一个新奇(extrotic)的存在,而typeof是语言内置的获取一个类型的方法。当然获得一个特定类型的typeof并不特殊,但是这个typeof的返回值是一个Object,并且是一个Type类型,这就意味着可以通过一个变量来保存任意的类型信息,从而可以在运行时决定一个变量是什么类型,进而基于类型进行很多额外的操作:例如枚举方法列表,动态创建对象,获得customattr等。

IL

由于typeof是CSharp的关键字,所以在编译的时候可以由编译器生成约定的IL调用,从这个源代码实现看:如果有这个关键字,会生成一个GetTypeFromHandle函数的调用。

///@file: pnet\cscc\csharp\cs_misc.tc
/*
 * Generate value code for the "typeof" operator.
 */
ILNode_GenValue(ILNode_TypeOf)
{
	ILGenTypeToken(info, IL_OP_LDTOKEN, node->type);
	ILGenAdjust(info, 1);
	ILGenCallByName(info,
			"class [.library]System.Type "
				"[.library]System.Type::GetTypeFromHandle"
				"(valuetype [.library]System.RuntimeTypeHandle)");
	return ILMachineType_ObjectRef;
}

builtin

和python一样,CSharp也通过C语言内置实现了一些CSharp的标准库方法,这其中就包括了这里用到的System.Type中的GetTypeFromHandle、GetTypeHandle、GetType方法。

/*
 * Structure of an "internalcall" method table entry.
 */
typedef struct
{
	const char	   *methodName;
	const char	   *signature;
	void           *func;
#if !defined(HAVE_LIBFFI)
	void           *marshal;
#endif

} ILMethodTableEntry;

IL_METHOD_BEGIN(Type_Methods)
	IL_METHOD("GetTypeFromHandle", "(vSystem.RuntimeTypeHandle;)oSystem.Type;", _IL_Type_GetTypeFromHandle, marshal_ppp)
	IL_METHOD("GetTypeHandle", "(oSystem.Object;)vSystem.RuntimeTypeHandle;", _IL_Type_GetTypeHandle, marshal_vppp)
	IL_METHOD("GetType", "(oSystem.String;ZZ)oSystem.Type;", _IL_Type_GetType, marshal_pppbb)
IL_METHOD_END

typedef struct
{
	const char *name;
	const char *namespace;
	const ILMethodTableEntry *entry;

} InternalClassInfo;
static InternalClassInfo const internalClassTable[] = {
#ifndef _IL_Activator_suppressed
	{"Activator", "System", Activator_Methods},
#endif
///...
#ifndef _IL_ClrField_suppressed
	{"ClrField", "System.Reflection", ClrField_Methods},
#endif
#ifndef _IL_ClrHelpers_suppressed
	{"ClrHelpers", "System.Reflection", ClrHelpers_Methods},
#endif
#ifndef _IL_ClrMethod_suppressed
	{"ClrMethod", "System.Reflection", ClrMethod_Methods},
#endif
#ifndef _IL_ClrParameter_suppressed
	{"ClrParameter", "System.Reflection", ClrParameter_Methods},
#endif
#ifndef _IL_ClrProperty_suppressed
	{"ClrProperty", "System.Reflection", ClrProperty_Methods},
#endif
#ifndef _IL_ClrResourceStream_suppressed
	{"ClrResourceStream", "System.Reflection", ClrResourceStream_Methods},
#endif
///...
ifndef _IL_TimeMethods_suppressed
	{"TimeMethods", "Platform", TimeMethods_Methods},
#endif
#ifndef _IL_Type_suppressed
	{"Type", "System", Type_Methods},
#endif
#ifndef _IL_TypeBuilder_suppressed
	{"TypeBuilder", "System.Reflection.Emit", TypeBuilder_Methods},
#endif
#ifndef _IL_TypedReference_suppressed
	{"TypedReference", "System", TypedReference_Methods},
#endif
#ifndef _IL_WaitEvent_suppressed
	{"WaitEvent", "System.Threading", WaitEvent_Methods},
#endif
#ifndef _IL_WaitHandle_suppressed
	{"WaitHandle", "System.Threading", WaitHandle_Methods},
#endif
};
#define numInternalClasses (sizeof(internalClassTable) / sizeof(InternalClassInfo))

object

对象管理

正如java一样,所有的对象都有一个ObjectHeader,在这个结构中包含了一个对象的classInfo,size等信息。或许这也正是CSharp/java中所有对象都是一个类对象的底层逻辑:每个object中可以统一存储一个classInfo结构。

///@file:pnet\engine\lib_defs.h
/*
 * Comment from Thong Nguyen (tum@veridicus.com)
 *
 * New object layout is like this:
 *
 * [Object Header][Managed Object Data]
 * ^                   ^
 * |                    |
 * |                    |____ILObject 
 * |
 * |_____GcBase & Start of ObjectHeader
 *
 *
 * Use GetMemPtr or GetObjectHeader to get the GcBase/ObjectHeader
 * from an (ILObject *).
 *
 * Use GetObjectFromGcBase from get an (ILObject *) from a (void *).
 */

///@file:pnet\engine\engine.h
/*
*	Header of an object.
*/
typedef struct _tagObjectHeader ILObjectHeader;

struct _tagObjectHeader
{
	ILClassPrivate *classPrivate;
#ifdef IL_CONFIG_USE_THIN_LOCKS
	/* NOTHING */
#else
	volatile ILLockWord lockWord;
#endif
};

/* class private data */
typedef struct _tagILClassPrivate ILClassPrivate;

/*
 * Private information that is associated with a class.
 */
struct _tagILClassPrivate
{
	ILClass		   *classInfo;			/* Back-pointer to the class */
	ILUInt32		size;				/* Full instance size */
	ILUInt32		nativeSize;			/* Full native instance size */
	ILUInt32		staticSize;			/* Size of static data */
	ILUInt32		inLayout : 1;		/* Non-zero if in layout algorithm */
	ILUInt32		hasFinalizer : 1;	/* Non-zero if non-trivial finalizer */
	ILUInt32		managedInstance : 1;/* Non-zero if managed instance field */
	ILUInt32		managedStatic : 1;	/* Non-zero if managed static field */
	ILUInt32		alignment : 6;		/* Preferred instance alignment */
	ILUInt32		nativeAlignment : 6;/* Preferred native alignment */
	ILUInt32		vtableSize : 16;	/* Size of the vtable */
	ILMethod      **vtable;				/* Methods within the vtable */
	ILObject       *clrType;			/* Associated CLR type object */
	ILObject       *staticData;			/* Static data area object */
	ILImplPrivate  *implements;			/* Interface implementation records */
	ILNativeInt		gcTypeDescriptor;	/* Describes the layout of the type for the GC */
	ILClassPrivate *nextClassPrivate;	/* linked list of ILClassPrivate objects */
	ILExecProcess  *process;			/* Back-pointer to the process this class belongs to */
#ifdef IL_USE_JIT
	void		  **jitVtable;			/* table with vtable pointers to the vtable methods. */
	ILJitTypes		jitTypes;			/* jit types for this CLR type */
#endif
#ifdef IL_USE_IMTS
	ILUInt32		imtBase;			/* Base for IMT identifiers */
#ifdef IL_USE_JIT
	void		   *imt[IL_IMT_SIZE];	/* Interface method table with vtable pointers. */
#else
	ILMethod	   *imt[IL_IMT_SIZE];	/* Interface method table */
#endif
#endif

};

对象创建

对象的创建其实也比较直观,就是自己算对象需要的大小,加上header结构的大小,从堆中分配内存,然后将header中的classInfo设置到对应指针中。

ILObject *_ILEngineAlloc(ILExecThread *thread, ILClass *classInfo,
						 ILUInt32 size)
{
	void *ptr;
	ILObject *obj;

	if (classInfo == 0)
	{
		/* Allocating non-object memory so no need to make space for the header. */
		return ILGCAlloc(size);
	}
	else
	{
		/* Make sure the class has been initialized before we start */
		if (!InitializeClass(thread, classInfo))
		{
			return 0;
		}

		/* Allocate memory from the heap */
		ptr = ILGCAlloc(size + IL_OBJECT_HEADER_SIZE);
		
		if(!ptr)
		{
			/* Throw an "OutOfMemoryException" */
			thread->thrownException = thread->process->outOfMemoryObject;
			return 0;
		}

		obj = GetObjectFromGcBase(ptr);

		/* Set the class into the block */
		SetObjectClassPrivate(obj, (ILClassPrivate *)(classInfo->userData));
		

		/* Attach a finalizer to the object if the class has
		a non-trival finalizer method attached to it */
		if(((ILClassPrivate *)(classInfo->userData))->hasFinalizer)
		{
			ILGCRegisterFinalizer(ptr, _ILFinalizeObject, thread->process->finalizationContext);
		}

		/* Return a pointer to the object */
		return obj;
	}
}

GetTypeFromHandle

在pnetlib\runtime\System\Type.cs看到的其实大部分都是一个抽象接口,也就是说这个类本身并不能用来生成对象,它真正的内存表示是通过GetTypeFromHandle函数实现的

/*
 * public static Type GetTypeFromHandle(RuntimeTypeHandle handle);
 */
ILObject *_IL_Type_GetTypeFromHandle(ILExecThread *thread, void *handle)
{
#ifdef IL_USE_JIT
	ILClass *classInfo = (ILClass *)handle;
#else
	ILClass *classInfo = *((ILClass **)handle);
#endif
	if(classInfo)
	{
		return _ILGetClrType(thread, classInfo);
	}
	else
	{
		return 0;
	}
}

ILObject *_ILGetClrType(ILExecThread *thread, ILClass *classInfo)
{
	classInfo = ILClassResolve(classInfo);

	if(!classInfo)
	{
		thread->thrownException = _ILSystemException
			(thread, "System.TypeInitializationException");
		return 0;
	}

	if((!classInfo->userData) ||
	   !((ILClassPrivate *)(classInfo->userData))->clrType)
	{
		ILClassPrivate *classPrivate;
		ILObject *obj;

		/* Make sure that the class has been laid out */
		IL_METADATA_WRLOCK(_ILExecThreadProcess(thread));
		if(!_ILLayoutClass(_ILExecThreadProcess(thread), classInfo))
		{
			IL_METADATA_UNLOCK(_ILExecThreadProcess(thread));
			thread->thrownException = _ILSystemException
				(thread, "System.TypeInitializationException");
			return 0;
		}
		IL_METADATA_UNLOCK(_ILExecThreadProcess(thread));

		classPrivate = (ILClassPrivate *)(classInfo->userData);

		/* Does the class already have a "ClrType" instance? */
		if(!classPrivate->clrType)
		{
			/* Create a new "ClrType" instance */
			if(!(thread->process->clrTypeClass))
			{
				thread->thrownException = _ILSystemException
					(thread, "System.TypeInitializationException");
				return 0;
			}
			obj = _ILEngineAllocObject(thread, thread->process->clrTypeClass);
			if(!obj)
			{
				return 0;
			}

			/* Fill in the object with the class information */
			((System_Reflection *)obj)->privateData = classInfo;

			/* Attach the object to the class so that it will be returned
			   for future calls to this function.
			   We have to use a locked compare and exchange here because of
			   possible race conditions to be sure that only one clr object
			   for each class is used.
			   If there was one extra object created it will be collected by
			   the garbage collector */
			ILInterlockedCompareAndExchangePointers((void **)&(classPrivate->clrType), obj, 0);
		}
	}

	/* Return the object to the caller */
	return ((ILClassPrivate *)(classInfo->userData))->clrType;
}
/*
 * Load standard classes and objects.
 */
void _ILExecProcessLoadStandard(ILExecProcess *process,
								ILImage *image)
{
///...
		process->clrTypeClass = ILClassLookupGlobal(ILImageToContext(image),
								        "ClrType", "System.Reflection");
///...
}

也就是说,实际上返回的Type是一个ClrType类型。

///@file: pnetlib\runtime\System\Reflection\ClrType.cs
internal class ClrType : Type, ICloneable, IClrProgramItem
#if CONFIG_SERIALIZATION
	, ISerializable
#endif
{
	// Internal state.
	internal IntPtr privateData;
///...
	// Get the category of this type.  Array, pointer, byref, primitive, etc.
	[MethodImpl(MethodImplOptions.InternalCall)]
	extern private ClrTypeCategory GetClrTypeCategory();
}

栗子

下面是使用dotgnu编译生成的中间语言代码。在使用的过程中会发现一些有意思的现象:

  • 架构。工程有一个驱动cscc(对应于gcc),这个驱动根据输入文件后缀决定调用CSharp、vb、java、bf、c等语言的真正编译器,这些编译器再生成中间语言。
  • 工具集。在pnet文件夹下包含了ildb(gdb)调试器、ilas(gas)汇编器、ildasm(objdump?)反汇编器、ilalink(ld)链接器、ildd(ldd)依赖分析等。
  • 命令选项。只生成IL使用选项-S,只生成obj文件使用-c,扩展选项集使用-f开始
tsecer@harry: cat tsecer.cs 
//using System.Type;

namespace Tsecer
{
    class Harry
    {
        void Fry()
        {
            System.Type leela = typeof(Harry);
        }
    };
};
tsecer@harry: /home/tsecer/dotgnucvs/pnet/cscc/cscc -S -fstdlib-name=/home/tsecer/dotgnucvs/pnetlib/runtime/mscorlib.dll tsecer.cs 
tsecer@harry: cat tsecer.il 
.assembly extern 'mscorlib'
{
        .ver 2:0:0:0
}
.assembly '<Assembly>'
{
        .ver 0:0:0:0
}
.module '<Module>'
.namespace 'Tsecer'
{
.class private auto ansi 'Harry' extends ['mscorlib']'System'.'Object'
{
.method private hidebysig instance void 'Fry'() cil managed 
{
        .locals init    (class ['mscorlib']'System'.'Type')
        ldtoken 'Tsecer'.'Harry'
        call    class [.library]System.Type [.library]System.Type::GetTypeFromHandle(valuetype [.library]System.RuntimeTypeHandle)
        stloc.0
        ret
        .maxstack 1
} // method Fry
.method public hidebysig specialname rtspecialname instance void '.ctor'() cil managed 
{
        ldarg.0
        call    instance void ['mscorlib']'System'.'Object'::'.ctor'()
        ret
        .maxstack 1
} // method .ctor
} // class Harry
} // namespace Tsecer
tsecer@harry: 

Thoughts

有意思的地方在于库函数中定义了很多和运行时环境的互相调用类和方法,而且typeof这种内置的关键字实现也是通过cs代码调用运行时C代码完成的。这种调用机制也就是CSharp中的P/invoke机制,但是在网上找到的例子返回的都是一些int、bool这样的基本类型,如果想像这种typeof实现一样,返回一个CSharp识别的Object对象该如何实现呢?通过MarShal库?

python

在python中,运行时获得一个对象是通过LOAD_ATTR字节码实现,而这个指令的解释是通过PyObject_GetAttr函数实现的。在函数的最开始,首先就是从对象中获得类型。我们其实可以通过大家熟悉的protobuf来理解这种模型:object更多的只是一段内存,这段内存具体如何解释需要结合对应的描述结构来解释。

PyObject *
PyObject_GetAttr(PyObject *v, PyObject *name)
{
    PyTypeObject *tp = Py_TYPE(v);

    if (!PyUnicode_Check(name)) {
        PyErr_Format(PyExc_TypeError,
                     "attribute name must be string, not '%.200s'",
                     name->ob_type->tp_name);
        return NULL;
    }
    if (tp->tp_getattro != NULL)
        return (*tp->tp_getattro)(v, name);
    if (tp->tp_getattr != NULL) {
        char *name_str = PyUnicode_AsUTF8(name);
        if (name_str == NULL)
            return NULL;
        return (*tp->tp_getattr)(v, name_str);
    }
    PyErr_Format(PyExc_AttributeError,
                 "'%.50s' object has no attribute '%U'",
                 tp->tp_name, name);
    return NULL;
}

/* Nothing is actually declared to be a PyObject, but every pointer to
 * a Python object can be cast to a PyObject*.  This is inheritance built
 * by hand.  Similarly every pointer to a variable-size Python object can,
 * in addition, be cast to PyVarObject*.
 */
typedef struct _object {
    _PyObject_HEAD_EXTRA
    Py_ssize_t ob_refcnt;
    struct _typeobject *ob_type;
} PyObject;

typedef struct {
    PyObject ob_base;
    Py_ssize_t ob_size; /* Number of items in variable part */
} PyVarObject;

#define Py_REFCNT(ob)           (((PyObject*)(ob))->ob_refcnt)
#define Py_TYPE(ob)             (((PyObject*)(ob))->ob_type)
#define Py_SIZE(ob)             (((PyVarObject*)(ob))->ob_size)

在文件中,几段长长(但是非常重要)的注释。注释说明了所有的对象都包含有一个_typeobject *ob_type字段,另外一些关键的内容包括:

  • 对象本身存在于堆中(Objects are structures allocated on the heap)
  • 每个对象都包含有类型信息,当对象创建时,它的类型就已经确定(An object has a 'type' that determines what it represents and what kind of data it contains. An object's type is fixed when it is created.)
  • 对象虽然在运行时创建,但是也只有创建那一刻是自由的,在创建之后,对象的内存地址和大小都会固定(Objects do not float around in memory; once allocated an object keeps the same size and address)
///@file: Python-3.6.0\Include\object.h
/* Object and type object interface */

/*
Objects are structures allocated on the heap.  Special rules apply to
the use of objects to ensure they are properly garbage-collected.
Objects are never allocated statically or on the stack; they must be
accessed through special macros and functions only.  (Type objects are
exceptions to the first rule; the standard types are represented by
statically initialized type objects, although work on type/class unification
for Python 2.2 made it possible to have heap-allocated type objects too).

An object has a 'reference count' that is increased or decreased when a
pointer to the object is copied or deleted; when the reference count
reaches zero there are no references to the object left and it can be
removed from the heap.

An object has a 'type' that determines what it represents and what kind
of data it contains.  An object's type is fixed when it is created.
Types themselves are represented as objects; an object contains a
pointer to the corresponding type object.  The type itself has a type
pointer pointing to the object representing the type 'type', which
contains a pointer to itself!).

Objects do not float around in memory; once allocated an object keeps
the same size and address.  Objects that must hold variable-size data
can contain pointers to variable-size parts of the object.  Not all
objects of the same type have the same size; but the size cannot change
after allocation.  (These restrictions are made so a reference to an
object can be simply a pointer -- moving an object would require
updating all the pointers, and changing an object's size would require
moving it if there was another object right next to it.)

Objects are always accessed through pointers of the type 'PyObject *'.
The type 'PyObject' is a structure that only contains the reference count
and the type pointer.  The actual memory allocated for an object
contains other data that can only be accessed after casting the pointer
to a pointer to a longer structure type.  This longer type must start
with the reference count and type fields; the macro PyObject_HEAD should be
used for this (to accommodate for future changes).  The implementation
of a particular object type can cast the object pointer to the proper
type and back.

A standard interface exists for objects that contain an array of items
whose size is determined when the object is allocated.
*/