This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

speeding up wcwidth



wcwidth currently does two table lookups per call: in the iswprint table
and then only in the width table. Here is a patch to incorporate the
iswprint result in the width table, thus moving the iswprint call from
runtime to localedef time.

It speeds up wcwidth by 27%. The LC_CTYPE table increases only by 1%.

Average time for an wcwidth (and, for comparison, iswalpha and towupper) call
on a sequence of 7160 wide characters:

                                    before     with the patch
wcwidth
  Markus Kuhn's UTF-8-demo.txt     0.20 usec    0.142 usec
  an average ASCII only text       0.20 usec    0.142 usec
iswalpha
  Markus Kuhn's UTF-8-demo.txt            0.13 usec
  an average ASCII only text              0.13 usec
towupper
  Markus Kuhn's UTF-8-demo.txt            0.13 usec
  an average ASCII only text              0.13 usec

I don't think we need to bump the LIMAGIC in localeinfo.h: Last time you
bumped it was on 2000-08-28 (after 2.1.92), and I don't think glibc was
widely distributed since then.


2000-09-30  Bruno Haible  <haible@clisp.cons.org>

	* wcsmbs/wcwidth.h (__ctype32_wctype): Remove declaration.
	(internal_wcwidth): Remove the tests for L'\0' and iswprint.
	* locale/programs/ld-ctype.c (find_idx): If max == NULL, return
	a pointer to the table entry, without extending the table.
	(allocate_arrays): Set a width only for characters with 'print'
	property. Set the width of L'\0' to 0.
	* locale/C-ctype.c (_nl_C_LC_CTYPE_width): Use default entry 0xff
	for unprintable characters. Set entry of NUL to 0.

*** glibc-20000928/wcsmbs/wcwidth.h.bak	Fri Sep  1 22:17:02 2000
--- glibc-20000928/wcsmbs/wcwidth.h	Sat Sep 30 13:36:36 2000
***************
*** 22,30 ****
  #include <wctype.h>
  #include "../wctype/wchar-lookup.h"
  
- /* Tables containing character property information.  */
- extern const char *__ctype32_wctype[12];
- 
  /* Table containing width information.  */
  extern const char *__ctype32_width;
  
--- 22,27 ----
***************
*** 33,44 ****
  {
    unsigned char res;
  
!   if (wc == L'\0')
!     return 0;
! 
!   if (wctype_table_lookup (__ctype32_wctype[__ISwprint], wc) == 0)
!     return -1;
! 
    res = wcwidth_table_lookup (__ctype32_width, wc);
    return res == (unsigned char) '\xff' ? -1 : (int) res;
  }
--- 30,39 ----
  {
    unsigned char res;
  
!   /* The tables have been prepared in such a way that
!      1. wc == L'\0' yields res = 0,
!      2. !iswprint (wc) implies res = '\xff'.  */
    res = wcwidth_table_lookup (__ctype32_width, wc);
+ 
    return res == (unsigned char) '\xff' ? -1 : (int) res;
  }
*** glibc-20000928/locale/programs/ld-ctype.c.bak	Fri Sep 29 01:03:36 2000
--- glibc-20000928/locale/programs/ld-ctype.c	Sat Sep 30 15:01:59 2000
***************
*** 1309,1314 ****
--- 1309,1318 ----
      /* We have done everything we are asked to do.  */
      return NULL;
  
+   if (max == NULL)
+     /* The caller does not want to extend the table.  */
+     return (cnt >= *act ? NULL : &(*table)[cnt]);
+ 
    if (cnt >= *act)
      {
        if (cnt >= *max)
***************
*** 3732,3739 ****
    ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
    ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
  
!   /* Array for width information.  Because the expected width are very
!      small we use only one single byte.  This saves space.  */
    {
      struct wcwidth_table t;
  
--- 3736,3748 ----
    ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
    ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
  
!   /* Array for width information.  Because the expected widths are very
!      small (never larger than 2) we use only one single byte.  This
!      saves space.
!      We put only printable characters in the table.  wcwidth is specified
!      to return -1 for non-printable characters.  Doing the check here
!      saves a run-time check.
!      But we put L'\0' in the table.  This again saves a run-time check.  */
    {
      struct wcwidth_table t;
  
***************
*** 3741,3747 ****
      t.q = 9;
      wcwidth_table_init (&t);
  
!     /* First set all the characters of the character set to the default width.  */
      curs = NULL;
      while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
        {
--- 3750,3757 ----
      t.q = 9;
      wcwidth_table_init (&t);
  
!     /* First set all the printable characters of the character set to
!        the default width.  */
      curs = NULL;
      while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
        {
***************
*** 3752,3758 ****
  					      data->name, len);
  
  	if (data->ucs4 != ILLEGAL_CHAR_VALUE)
! 	  wcwidth_table_add (&t, data->ucs4, charmap->width_default);
        }
  
      /* Now add the explicitly specified widths.  */
--- 3762,3775 ----
  					      data->name, len);
  
  	if (data->ucs4 != ILLEGAL_CHAR_VALUE)
! 	  {
! 	    uint32_t *class_bits =
! 	      find_idx (ctype, &ctype->class_collection, NULL,
! 			&ctype->class_collection_act, data->ucs4);
! 
! 	    if (class_bits != NULL && (*class_bits & BITw (tok_print)))
! 	      wcwidth_table_add (&t, data->ucs4, charmap->width_default);
! 	  }
        }
  
      /* Now add the explicitly specified widths.  */
***************
*** 3792,3799 ****
  					       strlen (seq->name));
  
  		if (wch != ILLEGAL_CHAR_VALUE)
! 		  /* Store the value.  */
! 		  wcwidth_table_add (&t, wch, charmap->width_rules[cnt].width);
  
  		/* "Increment" the bytes sequence.  */
  		inner = nbytes - 1;
--- 3809,3824 ----
  					       strlen (seq->name));
  
  		if (wch != ILLEGAL_CHAR_VALUE)
! 		  {
! 		    /* Store the value.  */
! 		    uint32_t *class_bits =
! 		      find_idx (ctype, &ctype->class_collection, NULL,
! 				&ctype->class_collection_act, wch);
! 
! 		    if (class_bits != NULL && (*class_bits & BITw (tok_print)))
! 		      wcwidth_table_add (&t, wch,
! 					 charmap->width_rules[cnt].width);
! 		  }
  
  		/* "Increment" the bytes sequence.  */
  		inner = nbytes - 1;
***************
*** 3819,3824 ****
--- 3844,3852 ----
  	      }
  	  }
        }
+ 
+     /* Set the width of L'\0' to 0.  */
+     wcwidth_table_add (&t, 0, 0);
  
      wcwidth_table_finalize (&t);
  
*** glibc-20000928/locale/C-ctype.c.bak	Fri Sep  1 22:16:59 2000
--- glibc-20000928/locale/C-ctype.c	Sat Sep 30 16:06:25 2000
***************
*** 504,521 ****
  {
    uint32_t header[5];
    uint32_t level1[1];
!   uint32_t level2[1];
!   uint8_t level3[1];
  }
  _nl_C_LC_CTYPE_width =
  {
!   { 7, 1, 0, 0, 0 },
    /* 1st-level table */
    { 6 * sizeof (uint32_t) },
    /* 2nd-level table */
!   { 7 * sizeof (uint32_t) },
    /* 3rd-level table */
!   { 1 }
  };
  
  /* Number of fields with fixed meanings, starting at 0.  */
--- 504,530 ----
  {
    uint32_t header[5];
    uint32_t level1[1];
!   uint32_t level2[8];
!   int8_t level3[33];
  }
  _nl_C_LC_CTYPE_width =
  {
!   { 7, 1, 4, 7, 15 },
    /* 1st-level table */
    { 6 * sizeof (uint32_t) },
    /* 2nd-level table */
!   {
!     14 * sizeof (uint32_t) +  0, 0,
!     14 * sizeof (uint32_t) + 16, 14 * sizeof (uint32_t) + 16,
!     14 * sizeof (uint32_t) + 16, 14 * sizeof (uint32_t) + 16,
!     14 * sizeof (uint32_t) + 16, 14 * sizeof (uint32_t) + 17
!   },
    /* 3rd-level table */
!   {
!      0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
!      1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
!      -1
!   }
  };
  
  /* Number of fields with fixed meanings, starting at 0.  */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]