Skip to content

Commit 7917e28

Browse files
committed
feat(longobject): add int64 range helpers and _PyCompactLong_AddWide declaration
Add inline infrastructure to pycore_long.h for the upcoming wide int addition fast path: - _PY_LONG_MAX_DIGITS_FOR_INT64: macro for the maximum digit count that can still fit in int64_t (2 on 30-bit builds, 5 on 15-bit) - _PyLong_FitsInt64(): cheap tag-based check; fast-paths compact and small-digit ints before inspecting the boundary digit - _PyLong_CheckExactAndFitsInt64(): exact-type + fits-int64 guard for use in specialization guards - _PyLong_TryAsInt64Exact(): no-exception int64 extraction; special-cases the ndigits==2/30-bit path for the common case - PyAPI_FUNC declaration for _PyCompactLong_AddWide()
1 parent 3a8bebd commit 7917e28

1 file changed

Lines changed: 97 additions & 0 deletions

File tree

Include/internal/pycore_long.h

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ PyAPI_DATA(PyObject*) _PyLong_Lshift(PyObject *, int64_t);
116116
PyAPI_FUNC(_PyStackRef) _PyCompactLong_Add(PyLongObject *left, PyLongObject *right);
117117
PyAPI_FUNC(_PyStackRef) _PyCompactLong_Multiply(PyLongObject *left, PyLongObject *right);
118118
PyAPI_FUNC(_PyStackRef) _PyCompactLong_Subtract(PyLongObject *left, PyLongObject *right);
119+
PyAPI_FUNC(_PyStackRef) _PyCompactLong_AddWide(PyLongObject *left, PyLongObject *right);
119120

120121
// Export for 'binascii' shared extension.
121122
PyAPI_DATA(unsigned char) _PyLong_DigitValue[256];
@@ -346,6 +347,102 @@ _PyLong_CheckExactAndCompact(PyObject *op)
346347
return PyLong_CheckExact(op) && _PyLong_IsCompact((const PyLongObject *)op);
347348
}
348349

350+
/* Max number of digits a PyLong can have and still fit in int64_t.
351+
* 30-bit builds: ceil(64/30) = 3. 15-bit builds: ceil(64/15) = 5. */
352+
#define _PY_LONG_MAX_DIGITS_FOR_INT64 ((64 + PyLong_SHIFT - 1) / PyLong_SHIFT)
353+
354+
/* Return 1 if v fits in int64_t. Does not require exact type. */
355+
static inline int
356+
_PyLong_FitsInt64(const PyLongObject *v)
357+
{
358+
uintptr_t tag = v->long_value.lv_tag;
359+
/* Fast path: digit count is strictly below the max — always fits. */
360+
if (tag < ((uintptr_t)_PY_LONG_MAX_DIGITS_FOR_INT64 << NON_SIZE_BITS)) {
361+
return 1;
362+
}
363+
Py_ssize_t ndigits = (Py_ssize_t)(tag >> NON_SIZE_BITS);
364+
if (ndigits > _PY_LONG_MAX_DIGITS_FOR_INT64) {
365+
return 0;
366+
}
367+
/* ndigits == _PY_LONG_MAX_DIGITS_FOR_INT64: check the top digit. */
368+
unsigned int shift = PyLong_SHIFT * (unsigned int)(ndigits - 1);
369+
uint64_t top = (uint64_t)v->long_value.ob_digit[ndigits - 1];
370+
if ((tag & SIGN_MASK) == SIGN_NEGATIVE) {
371+
uint64_t max_top = ((uint64_t)INT64_MAX + 1) >> shift;
372+
if (top < max_top) {
373+
return 1;
374+
}
375+
if (top > max_top) {
376+
return 0;
377+
}
378+
/* top == max_top: only INT64_MIN has all lower digits == 0. */
379+
for (Py_ssize_t i = 0; i < ndigits - 1; i++) {
380+
if (v->long_value.ob_digit[i] != 0) {
381+
return 0;
382+
}
383+
}
384+
return 1;
385+
}
386+
uint64_t max_top = (uint64_t)INT64_MAX >> shift;
387+
return top <= max_top;
388+
}
389+
390+
static inline int
391+
_PyLong_CheckExactAndFitsInt64(PyObject *op)
392+
{
393+
return PyLong_CheckExact(op) && _PyLong_FitsInt64((const PyLongObject *)op);
394+
}
395+
396+
/* Extract an exact int to int64_t without raising.
397+
* Returns true and writes *out on success; returns false if out of range.
398+
* Never sets a Python exception. */
399+
static inline bool
400+
_PyLong_TryAsInt64Exact(PyLongObject *v, int64_t *out)
401+
{
402+
assert(PyLong_CheckExact((PyObject *)v));
403+
uintptr_t tag = v->long_value.lv_tag;
404+
int sign = 1 - (int)(tag & SIGN_MASK);
405+
/* Compact (0 or 1 digit): fast, branchless extraction. */
406+
if (tag < (2u << NON_SIZE_BITS)) {
407+
*out = (int64_t)(sign * (Py_ssize_t)v->long_value.ob_digit[0]);
408+
return true;
409+
}
410+
Py_ssize_t ndigits = (Py_ssize_t)(tag >> NON_SIZE_BITS);
411+
if (ndigits > _PY_LONG_MAX_DIGITS_FOR_INT64) {
412+
return false;
413+
}
414+
uint64_t abs_val = 0;
415+
#if PyLong_SHIFT == 30
416+
if (ndigits == 2) {
417+
/* Most common non-compact case on 64-bit builds. */
418+
abs_val = (uint64_t)v->long_value.ob_digit[0] |
419+
((uint64_t)v->long_value.ob_digit[1] << 30);
420+
*out = sign < 0 ? -(int64_t)abs_val : (int64_t)abs_val;
421+
return true;
422+
}
423+
#endif
424+
unsigned int shift = 0;
425+
for (Py_ssize_t i = 0; i < ndigits; i++) {
426+
uint64_t d = (uint64_t)v->long_value.ob_digit[i];
427+
if (ndigits == _PY_LONG_MAX_DIGITS_FOR_INT64 && i == ndigits - 1 &&
428+
shift != 0 && (d >> (64 - shift)) != 0)
429+
{
430+
return false;
431+
}
432+
abs_val |= d << shift;
433+
shift += PyLong_SHIFT;
434+
}
435+
if (abs_val <= (uint64_t)INT64_MAX) {
436+
*out = sign < 0 ? -(int64_t)abs_val : (int64_t)abs_val;
437+
return true;
438+
}
439+
if (sign < 0 && abs_val == (uint64_t)INT64_MAX + 1) {
440+
*out = INT64_MIN;
441+
return true;
442+
}
443+
return false;
444+
}
445+
349446
#ifdef __cplusplus
350447
}
351448
#endif

0 commit comments

Comments
 (0)