@@ -92,6 +92,16 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
9292extern "C" {
9393#endif
9494
95+ /* This dictionary holds all interned unicode strings. Note that references
96+ to strings in this dictionary are *not* counted in the string's ob_refcnt.
97+ When the interned string reaches a refcnt of 0 the string deallocation
98+ function will delete the reference from this dictionary.
99+
100+ Another way to look at this is that to say that the actual reference
101+ count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
102+ */
103+ static PyObject * interned ;
104+
95105/* Free list for Unicode objects */
96106static PyUnicodeObject * unicode_freelist ;
97107static int unicode_freelist_size ;
@@ -276,6 +286,7 @@ PyUnicodeObject *_PyUnicode_New(Py_ssize_t length)
276286 unicode -> str [length ] = 0 ;
277287 unicode -> length = length ;
278288 unicode -> hash = -1 ;
289+ unicode -> state = 0 ;
279290 unicode -> defenc = NULL ;
280291 return unicode ;
281292
@@ -288,6 +299,25 @@ PyUnicodeObject *_PyUnicode_New(Py_ssize_t length)
288299static
289300void unicode_dealloc (register PyUnicodeObject * unicode )
290301{
302+ switch (PyUnicode_CHECK_INTERNED (unicode )) {
303+ case SSTATE_NOT_INTERNED :
304+ break ;
305+
306+ case SSTATE_INTERNED_MORTAL :
307+ /* revive dead object temporarily for DelItem */
308+ unicode -> ob_refcnt = 3 ;
309+ if (PyDict_DelItem (interned , (PyObject * )unicode ) != 0 )
310+ Py_FatalError (
311+ "deletion of interned unicode string failed" );
312+ break ;
313+
314+ case SSTATE_INTERNED_IMMORTAL :
315+ Py_FatalError ("Immortal interned unicode string died." );
316+
317+ default :
318+ Py_FatalError ("Inconsistent interned unicode string state." );
319+ }
320+
291321 if (PyUnicode_CheckExact (unicode ) &&
292322 unicode_freelist_size < MAX_UNICODE_FREELIST_SIZE ) {
293323 /* Keep-Alive optimization */
@@ -8564,6 +8594,115 @@ _PyUnicode_Fini(void)
85648594 unicode_freelist_size = 0 ;
85658595}
85668596
8597+ void
8598+ PyUnicode_InternInPlace (PyObject * * p )
8599+ {
8600+ register PyUnicodeObject * s = (PyUnicodeObject * )(* p );
8601+ PyObject * t ;
8602+ if (s == NULL || !PyUnicode_Check (s ))
8603+ Py_FatalError (
8604+ "PyUnicode_InternInPlace: unicode strings only please!" );
8605+ /* If it's a subclass, we don't really know what putting
8606+ it in the interned dict might do. */
8607+ if (!PyUnicode_CheckExact (s ))
8608+ return ;
8609+ if (PyUnicode_CHECK_INTERNED (s ))
8610+ return ;
8611+ if (interned == NULL ) {
8612+ interned = PyDict_New ();
8613+ if (interned == NULL ) {
8614+ PyErr_Clear (); /* Don't leave an exception */
8615+ return ;
8616+ }
8617+ }
8618+ t = PyDict_GetItem (interned , (PyObject * )s );
8619+ if (t ) {
8620+ Py_INCREF (t );
8621+ Py_DECREF (* p );
8622+ * p = t ;
8623+ return ;
8624+ }
8625+
8626+ if (PyDict_SetItem (interned , (PyObject * )s , (PyObject * )s ) < 0 ) {
8627+ PyErr_Clear ();
8628+ return ;
8629+ }
8630+ /* The two references in interned are not counted by refcnt.
8631+ The deallocator will take care of this */
8632+ s -> ob_refcnt -= 2 ;
8633+ PyUnicode_CHECK_INTERNED (s ) = SSTATE_INTERNED_MORTAL ;
8634+ }
8635+
8636+ void
8637+ PyUnicode_InternImmortal (PyObject * * p )
8638+ {
8639+ PyUnicode_InternInPlace (p );
8640+ if (PyUnicode_CHECK_INTERNED (* p ) != SSTATE_INTERNED_IMMORTAL ) {
8641+ PyUnicode_CHECK_INTERNED (* p ) = SSTATE_INTERNED_IMMORTAL ;
8642+ Py_INCREF (* p );
8643+ }
8644+ }
8645+
8646+ PyObject *
8647+ PyUnicode_InternFromString (const char * cp )
8648+ {
8649+ PyObject * s = PyUnicode_FromString (cp );
8650+ if (s == NULL )
8651+ return NULL ;
8652+ PyUnicode_InternInPlace (& s );
8653+ return s ;
8654+ }
8655+
8656+ void _Py_ReleaseInternedUnicodeStrings (void )
8657+ {
8658+ PyObject * keys ;
8659+ PyUnicodeObject * s ;
8660+ Py_ssize_t i , n ;
8661+ Py_ssize_t immortal_size = 0 , mortal_size = 0 ;
8662+
8663+ if (interned == NULL || !PyDict_Check (interned ))
8664+ return ;
8665+ keys = PyDict_Keys (interned );
8666+ if (keys == NULL || !PyList_Check (keys )) {
8667+ PyErr_Clear ();
8668+ return ;
8669+ }
8670+
8671+ /* Since _Py_ReleaseInternedUnicodeStrings() is intended to help a leak
8672+ detector, interned unicode strings are not forcibly deallocated;
8673+ rather, we give them their stolen references back, and then clear
8674+ and DECREF the interned dict. */
8675+
8676+ n = PyList_GET_SIZE (keys );
8677+ fprintf (stderr , "releasing %" PY_FORMAT_SIZE_T "d interned strings\n" ,
8678+ n );
8679+ for (i = 0 ; i < n ; i ++ ) {
8680+ s = (PyUnicodeObject * ) PyList_GET_ITEM (keys , i );
8681+ switch (s -> state ) {
8682+ case SSTATE_NOT_INTERNED :
8683+ /* XXX Shouldn't happen */
8684+ break ;
8685+ case SSTATE_INTERNED_IMMORTAL :
8686+ s -> ob_refcnt += 1 ;
8687+ immortal_size += s -> length ;
8688+ break ;
8689+ case SSTATE_INTERNED_MORTAL :
8690+ s -> ob_refcnt += 2 ;
8691+ mortal_size += s -> length ;
8692+ break ;
8693+ default :
8694+ Py_FatalError ("Inconsistent interned string state." );
8695+ }
8696+ s -> state = SSTATE_NOT_INTERNED ;
8697+ }
8698+ fprintf (stderr , "total size of all interned strings: "
8699+ "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
8700+ "mortal/immortal\n" , mortal_size , immortal_size );
8701+ Py_DECREF (keys );
8702+ PyDict_Clear (interned );
8703+ Py_DECREF (interned );
8704+ interned = NULL ;
8705+ }
85678706
85688707
85698708/********************* Unicode Iterator **************************/
0 commit comments