Python [DRAFT]
Created at 2017-05-08T18:28:19+09:00

Inspection

>>> __name__
'__main__'

>>> __builtins__
<module 'builtins' (built-in)>
>>> type(__builtins__.__dict__) # this dictionry is available as current namespace
<class 'dict'>
>>> __builtins__.__dict__.keys()
dict_keys(['__name__', '__doc__', '__package__', ... 'exit', 'copyright', 'credits', 'license', 'help', '_'])
>>> __builtins__.__dict__['__name__']
'builtins'
>>> __builtins__.__name__
'builtins'
>>> __builtins__.help.__class__
<class '_sitebuiltins._Helper'>

>>> import pip
>>> pip.__name__

Namespace

>>> globals().keys()
>>> locals().keys()
>>> vars(<some-module>).keys() # equivalent to <some-module>.__dict__.keys()

Class and Instance

>>> import queue
>>> queue.Queue
<class 'queue.Queue'>
>>> dir(queue.Queue)
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_get', '_init', '_put', '_qsize', 'empty', 'full', 'get', 'get_nowait', 'join', 'put', 'put_nowait', 'qsize', 'task_done']
>>> vars(queue.Queue).keys()
dict_keys(['__module__', '__doc__', '__init__', 'task_done', 'join', 'qsize', 'empty', 'full', 'put', 'get', 'put_nowait', 'get_nowait', '_init', '_qsize', '_put', '_get', '__dict__', '__weakref__'])
>>> q = queue.Queue(5)
>>> dir(q)
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_get', '_init', '_put', '_qsize', 'all_tasks_done', 'empty', 'full', 'get', 'get_nowait', 'join', 'maxsize', 'mutex', 'not_empty', 'not_full', 'put', 'put_nowait', 'qsize', 'queue', 'task_done', 'unfinished_tasks']
>>> vars(q).keys()
dict_keys(['maxsize', 'queue', 'mutex', 'not_empty', 'not_full', 'all_tasks_done', 'unfinished_tasks'])

Build system

Build from source

$ mkdir out/_install
$ cd out
$ ../configure --prefix=$PWD/_install
$ make install -j 2
$ ./_install/bin/python3
Python 3.7.0a0 (heads/master:55ace65, May  8 2017, 18:46:07)
[GCC 4.2.1 Compatible Clang 3.8.1 (tags/RELEASE_381/final)] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> 1 + 1
2
>>> exit()

$ ./_install/bin/pip3 list # these two are installed by default
pip (9.0.1)
setuptools (28.8.0)

(NOTE) I tried CC=clang but it got linker error for ./_install/bin/pip3 install dbus-python later. So I used gcc as it is default.

Built artifacts

.
├── bin
│   ├── 2to3 -> 2to3-3.7
│   ├── 2to3-3.7
│   ├── easy_install-3.7
│   ├── idle3 -> idle3.7
│   ├── idle3.7
│   ├── pip3
│   ├── pip3.7
│   ├── pydoc3 -> pydoc3.7
│   ├── pydoc3.7
│   ├── python3 -> python3.7
│   ├── python3.7
│   ├── python3.7-config -> python3.7m-config
│   ├── python3.7m
│   ├── python3.7m-config
│   ├── python3-config -> python3.7-config
│   ├── pyvenv -> pyvenv-3.7
│   └── pyvenv-3.7
├── include
│   └── python3.7m
...     ...
│       ├── Python.h
...     ...
│       └── weakrefobject.h
├── lib
│   ├── libpython3.7m.a
│   ├── pkgconfig
│   │   ├── python-3.7m.pc -> python-3.7.pc
│   │   ├── python-3.7.pc
│   │   └── python3.pc -> python-3.7.pc
│   └── python3.7
...     ...
│       ├── site-packages/
│       ├── site.py
...     ...
│       └── zipfile.py
└── share
    └── man
        └── man1

[ site-packages structure ]

lib/python3.7/site-packages/
├── easy_install.py
├── pip
├── pip-9.0.1.dist-info
├── pkg_resources/ <-- this looks sketchy ?
├── __pycache__
├── README.txt
├── setuptools
└── setuptools-28.8.0.dist-info

Code Reading

[ Data structre ]
PyObject
'-' PyObject *_ob_next, _ob_prev (doubly link for tracking objects)
'-' PyTypeObject (defining object's characteristics/templat)
  '-' size                     (object size in heap)
  '-' tp_getattro, tp_setattro (object attribute getter/setter)
  '-' tp_dictoffset            (where dict lives in object memory, which could be used for holding attributes)
  '-' ..

PyModuleObject (< PyObject)
'-' md_dict (PyObject)
'-' PyModuleDef
  '-' name
  '-* PyMethodDef
    '-' ml_name
    '-' PyCFunction

class ?

PyInterpreterState
'-' builtins (buildin module's dict)
'-' sysdict (sys module's dict)
'-' modules
'-* PyThreadState
  '-' PyFrameObject
    '-' PyFrameObject f_back
    '-' PyCodeObject
    '-' f_locals
    '-' f_valuestack (virtual stack for bytecode execution ?)
    '-* PyTryBlock
  '-' ...?


[ Module ]

# template for module object
PyTypeObject PyModule_Type = {
  ..
  sizeof(PyModuleObject)
  ..
  offsetof(PyModuleObject, md_dict)
}

# module definitions
static struct PyModuleDef builtinsmodule = {
  .. "builtins" .. builtin_methods ..
}

static PyMethodDef builtin_methods[] = {
  ..
  { "__import__", builtin___import__ ..}
  { "dir",  builtin_dir, .. }
  { "vars", builtin_vars, .. }
  ..
}

- _PyBuiltin_Init =>
  - PyObject *mod = PyModule_Create(&builtinsmodule) => PyModule_Create2 =>
    - PyModuleDef_Init => ..
    - PyModuleObject *m = PyModule_New =>
      - PyModule_NewObject =>
        - PyObject_GC_New(PyModuleObject, &PyModule_Type) => .. (TODO: see object allocation)
        - m->md_dict = PyDict_New => new_dict =>
          - PyDictObject *mp = PyObject_GC_New(PyDictObject, &PyDict_Type)
        - module_init_dict => ...
    - PyModule_AddFunctions => _add_methods_to_object =>
      - for each method functions,
        - PyObject *func = PyCFunction_NewEx => PyObject_GC_New(PyCFunctionObject, &PyCFunction_Type)
        - PyObject_SetAttrString(obj, name, func) =>
          - PyObject_SetAttr =>
            - (*tp->tp_setattr) (this case PyModule_Type.tp_setattro, which is PyObject_GenericSetAttr) =>
              - _PyObject_GenericSetAttrWithDict(obj, name, value, ..) =>
                - PyObject **dictptr = _PyObject_GetDictPtr(obj) (just lookup PyTypeObject's tp_dictoffset)
                - _PyObjectDict_SetItem(tp, dictptr, name, value) => PyDict_SetItem
  - (then additionally defines stuff on this module dict)
  - SETBUILTIN("xxx", ..) (e.g. map) =>
    - PyDict_SetItemString(dict, "map", PyMap_Type)


[ Class (definition and instantiation) ]
TODO: attributes, method, inheritance


[ Main ]

(Initialization)
- Py_Initialize => Py_InitializeEx => _Py_InitializeEx_Private =>
  - PyInterpreterState *interp = PyInterpreterState_New => alloc
  - PyThreadState *tstate = PyThreadState_New => new_threadstate =>
    - alloc
    - _PyThreadState_Init => _PyGILState_NoteThreadState => ..
  - PyThreadState_Swap(tstate) => SET_TSTATE => atomically set _PyThreadState_Current
  - _PyGILState_Init => ..
  - _Py_ReadyTypes =>
    - PyType_Ready(&PyBaseObject_Type) ..
  - PyObject *bimod = _PyBuiltin_Init (SEE ABOVE)
  - interp->builtins = PyModule_GetDict(bimod)
  - PyObject *sysmod = _PySys_Init => ..
  - interp->sysdict = PyModule_GetDict(sysmod)
  - import_init =>
    - PyImport_ImportFrozenModule("_frozen_importlib")
    - interp->importlib = PyImport_AddModule("_frozen_importlib")
    - interp->import_func = PyDict_GetItemString(interp->builtins, "__import__")
    - PyObject *impmod = PyInit_imp => ..
    - PyDict_SetItemString(sys_modules, "_imp", impmod)
  - initmain =>
    - pyObject *m = PyImport_AddModule("__main__")
    - PyDict_SetItemString(.. "__builtins__", bimod)
    - ..

(Script execution)
- main => Py_Main =>
  - Py_Initialize => (SEE ABOVE)
  - run_file => PyRun_SimpleFileExFlags => PyRun_FileExFlags => =
    - PyArena *arena = PyArena_New
    - mod_ty mod = PyParser_ASTFromFileObject =>
      - node *n = PyParser_ParseFileObject => ..
      - PyAST_FromNodeObject(n ..) =>
        - asdl_seq *stmts = _Py_asdl_seq_new
        - ... traverse nodes ...
        - Module(stmts ..) => ..
    - run_mod =>
      - PyCodeObject *co = PyAST_CompileObject => ..
      - PyEval_EvalCode(co ..) => .. => _PyEval_EvalCodeWithName =>
        - PyThreadState *tstate = PyThreadState_GET
        - PyFrameObject *f = _PyFrame_New_NoTrack
        - PyEval_EvalFrameEx => PyInterpreterState.eval_frame (i.e. _PyEval_EvalFrameDefault)

(Bytecode interpreter (TODO: follow more))
- _PyEval_EvalFrameDefault =>
  - PyObject **stack_pointer = f->f_stacktop
  - for (;;)
    - NEXTOPARG
    - switch (opcode)
      - ...

TODO: