Ctypes in python crashes with memset

I am trying to erase a password string from memory as suggested here .

I wrote this little snippet:

import ctypes, sys

def zerome(string):
    location = id(string) + 20
    size     = sys.getsizeof(string) - 20
    #memset =  ctypes.cdll.msvcrt.memset
    # For Linux, use the following. Change the 6 to whatever it is on your computer.
    print ctypes.string_at(location, size)
    memset =  ctypes.CDLL("libc.so.6").memset
    memset(location, 0, size)
    print "Clearing 0x%08x size %i bytes" % (location, size)
    print ctypes.string_at(location, size)

a = "asdasd"

zerome(a)

Oddly enough, this code works fine with IPython,

[7] oz123@yenitiny:~ $ ipython a.py 
Clearing 0x02275b84 size 23 bytes

But crashing with Python:

[8] oz123@yenitiny:~ $ python a.py 
Segmentation fault
[9] oz123@yenitiny:~ $

Any ideas why?

I tested Debian Wheezy with Python 2.7.3.

small update ...

The code runs on CentOS 6.2 with Python 2.6.6. The code crashed on Debian with Python 2.6.8. I tried to understand why it works on CentOS and not on Debian. The only reason that has become very different is that my Debian is multi-archived and CentOS runs on my old laptop with an i686 processor.

, CentOS latop Debian Wheezy . Debian Wheezy, . , , Debian ...

+5
1

ctypes memset, libc/msvcrt. , 20 32- . 64- , , 36 . PyStringObject:

typedef struct {
    Py_ssize_t ob_refcnt;         // 4|8 bytes
    struct _typeobject *ob_type;  // 4|8 bytes
    Py_ssize_t ob_size;           // 4|8 bytes
    long ob_shash;                // 4|8 bytes (4 on 64-bit Windows)
    int ob_sstate;                // 4 bytes
    char ob_sval[1];
} PyStringObject; 

, 5 * 4 = 20 32- , 8 * 4 + 4 = 36 64- Linux 8 * 3 + 4 * 2 = 32 64- Windows, , sys.getsizeof. , , GC ( , id), __sizeof__. , .

. CPython , 1 , . :

>>> a = 'abcdef'
>>> bufsize = len(a) + 1
>>> offset = sys.getsizeof(a) - bufsize
>>> ctypes.memset(id(a) + offset, 0, bufsize)
3074822964L
>>> a
'\x00\x00\x00\x00\x00\x00'

Edit

PyStringObject. ob_sstate. 0, , , - . , ASCII , , ( , ).

from ctypes import *

class PyStringObject(Structure):
    _fields_ = [
      ('ob_refcnt', c_ssize_t),
      ('ob_type', py_object),
      ('ob_size', c_ssize_t),
      ('ob_shash', c_long),
      ('ob_sstate', c_int),
      # ob_sval varies in size
      # zero with memset is simpler
    ]

def zerostr(s):
    """zero a non-interned string"""
    if not isinstance(s, str):
        raise TypeError(
          "expected str object, not %s" % type(s).__name__)

    s_obj = PyStringObject.from_address(id(s))
    if s_obj.ob_sstate > 0:
        raise RuntimeError("cannot zero interned string")

    s_obj.ob_shash = -1  # not hashed yet
    offset = sizeof(PyStringObject)
    memset(id(s) + offset, 0, len(s))

:

>>> s = 'abcd' # interned by code object
>>> zerostr(s)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "<string>", line 10, in zerostr
RuntimeError: cannot zero interned string

>>> s = raw_input() # not interned
abcd
>>> zerostr(s)
>>> s
'\x00\x00\x00\x00'
+6

All Articles