This is the mail archive of the
libc-alpha@sources.redhat.com
mailing list for the glibc project.
speeding up wcwidth
- To: libc-alpha at sources dot redhat dot com
- Subject: speeding up wcwidth
- From: Bruno Haible <haible at ilog dot fr>
- Date: Sun, 1 Oct 2000 14:45:57 +0200 (CEST)
wcwidth currently does two table lookups per call: in the iswprint table
and then only in the width table. Here is a patch to incorporate the
iswprint result in the width table, thus moving the iswprint call from
runtime to localedef time.
It speeds up wcwidth by 27%. The LC_CTYPE table increases only by 1%.
Average time for an wcwidth (and, for comparison, iswalpha and towupper) call
on a sequence of 7160 wide characters:
before with the patch
wcwidth
Markus Kuhn's UTF-8-demo.txt 0.20 usec 0.142 usec
an average ASCII only text 0.20 usec 0.142 usec
iswalpha
Markus Kuhn's UTF-8-demo.txt 0.13 usec
an average ASCII only text 0.13 usec
towupper
Markus Kuhn's UTF-8-demo.txt 0.13 usec
an average ASCII only text 0.13 usec
I don't think we need to bump the LIMAGIC in localeinfo.h: Last time you
bumped it was on 2000-08-28 (after 2.1.92), and I don't think glibc was
widely distributed since then.
2000-09-30 Bruno Haible <haible@clisp.cons.org>
* wcsmbs/wcwidth.h (__ctype32_wctype): Remove declaration.
(internal_wcwidth): Remove the tests for L'\0' and iswprint.
* locale/programs/ld-ctype.c (find_idx): If max == NULL, return
a pointer to the table entry, without extending the table.
(allocate_arrays): Set a width only for characters with 'print'
property. Set the width of L'\0' to 0.
* locale/C-ctype.c (_nl_C_LC_CTYPE_width): Use default entry 0xff
for unprintable characters. Set entry of NUL to 0.
*** glibc-20000928/wcsmbs/wcwidth.h.bak Fri Sep 1 22:17:02 2000
--- glibc-20000928/wcsmbs/wcwidth.h Sat Sep 30 13:36:36 2000
***************
*** 22,30 ****
#include <wctype.h>
#include "../wctype/wchar-lookup.h"
- /* Tables containing character property information. */
- extern const char *__ctype32_wctype[12];
-
/* Table containing width information. */
extern const char *__ctype32_width;
--- 22,27 ----
***************
*** 33,44 ****
{
unsigned char res;
! if (wc == L'\0')
! return 0;
!
! if (wctype_table_lookup (__ctype32_wctype[__ISwprint], wc) == 0)
! return -1;
!
res = wcwidth_table_lookup (__ctype32_width, wc);
return res == (unsigned char) '\xff' ? -1 : (int) res;
}
--- 30,39 ----
{
unsigned char res;
! /* The tables have been prepared in such a way that
! 1. wc == L'\0' yields res = 0,
! 2. !iswprint (wc) implies res = '\xff'. */
res = wcwidth_table_lookup (__ctype32_width, wc);
+
return res == (unsigned char) '\xff' ? -1 : (int) res;
}
*** glibc-20000928/locale/programs/ld-ctype.c.bak Fri Sep 29 01:03:36 2000
--- glibc-20000928/locale/programs/ld-ctype.c Sat Sep 30 15:01:59 2000
***************
*** 1309,1314 ****
--- 1309,1318 ----
/* We have done everything we are asked to do. */
return NULL;
+ if (max == NULL)
+ /* The caller does not want to extend the table. */
+ return (cnt >= *act ? NULL : &(*table)[cnt]);
+
if (cnt >= *act)
{
if (cnt >= *max)
***************
*** 3732,3739 ****
ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
! /* Array for width information. Because the expected width are very
! small we use only one single byte. This saves space. */
{
struct wcwidth_table t;
--- 3736,3748 ----
ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
! /* Array for width information. Because the expected widths are very
! small (never larger than 2) we use only one single byte. This
! saves space.
! We put only printable characters in the table. wcwidth is specified
! to return -1 for non-printable characters. Doing the check here
! saves a run-time check.
! But we put L'\0' in the table. This again saves a run-time check. */
{
struct wcwidth_table t;
***************
*** 3741,3747 ****
t.q = 9;
wcwidth_table_init (&t);
! /* First set all the characters of the character set to the default width. */
curs = NULL;
while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
{
--- 3750,3757 ----
t.q = 9;
wcwidth_table_init (&t);
! /* First set all the printable characters of the character set to
! the default width. */
curs = NULL;
while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
{
***************
*** 3752,3758 ****
data->name, len);
if (data->ucs4 != ILLEGAL_CHAR_VALUE)
! wcwidth_table_add (&t, data->ucs4, charmap->width_default);
}
/* Now add the explicitly specified widths. */
--- 3762,3775 ----
data->name, len);
if (data->ucs4 != ILLEGAL_CHAR_VALUE)
! {
! uint32_t *class_bits =
! find_idx (ctype, &ctype->class_collection, NULL,
! &ctype->class_collection_act, data->ucs4);
!
! if (class_bits != NULL && (*class_bits & BITw (tok_print)))
! wcwidth_table_add (&t, data->ucs4, charmap->width_default);
! }
}
/* Now add the explicitly specified widths. */
***************
*** 3792,3799 ****
strlen (seq->name));
if (wch != ILLEGAL_CHAR_VALUE)
! /* Store the value. */
! wcwidth_table_add (&t, wch, charmap->width_rules[cnt].width);
/* "Increment" the bytes sequence. */
inner = nbytes - 1;
--- 3809,3824 ----
strlen (seq->name));
if (wch != ILLEGAL_CHAR_VALUE)
! {
! /* Store the value. */
! uint32_t *class_bits =
! find_idx (ctype, &ctype->class_collection, NULL,
! &ctype->class_collection_act, wch);
!
! if (class_bits != NULL && (*class_bits & BITw (tok_print)))
! wcwidth_table_add (&t, wch,
! charmap->width_rules[cnt].width);
! }
/* "Increment" the bytes sequence. */
inner = nbytes - 1;
***************
*** 3819,3824 ****
--- 3844,3852 ----
}
}
}
+
+ /* Set the width of L'\0' to 0. */
+ wcwidth_table_add (&t, 0, 0);
wcwidth_table_finalize (&t);
*** glibc-20000928/locale/C-ctype.c.bak Fri Sep 1 22:16:59 2000
--- glibc-20000928/locale/C-ctype.c Sat Sep 30 16:06:25 2000
***************
*** 504,521 ****
{
uint32_t header[5];
uint32_t level1[1];
! uint32_t level2[1];
! uint8_t level3[1];
}
_nl_C_LC_CTYPE_width =
{
! { 7, 1, 0, 0, 0 },
/* 1st-level table */
{ 6 * sizeof (uint32_t) },
/* 2nd-level table */
! { 7 * sizeof (uint32_t) },
/* 3rd-level table */
! { 1 }
};
/* Number of fields with fixed meanings, starting at 0. */
--- 504,530 ----
{
uint32_t header[5];
uint32_t level1[1];
! uint32_t level2[8];
! int8_t level3[33];
}
_nl_C_LC_CTYPE_width =
{
! { 7, 1, 4, 7, 15 },
/* 1st-level table */
{ 6 * sizeof (uint32_t) },
/* 2nd-level table */
! {
! 14 * sizeof (uint32_t) + 0, 0,
! 14 * sizeof (uint32_t) + 16, 14 * sizeof (uint32_t) + 16,
! 14 * sizeof (uint32_t) + 16, 14 * sizeof (uint32_t) + 16,
! 14 * sizeof (uint32_t) + 16, 14 * sizeof (uint32_t) + 17
! },
/* 3rd-level table */
! {
! 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
! 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
! -1
! }
};
/* Number of fields with fixed meanings, starting at 0. */