This is the mail archive of the
libc-alpha@sources.redhat.com
mailing list for the glibc project.
intl patches (17)
- To: libc-alpha at sources dot redhat dot com
- Subject: intl patches (17)
- From: Bruno Haible <haible at ilog dot fr>
- Date: Mon, 19 Mar 2001 22:40:58 +0100 (CET)
There are four problems with the plural expression parser:
1) It doesn't support all operators. The glibc manual lists, as examples,
the following expressions
n>1
n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2
but the lexer/parser doesn't know the '<', '>=', '<=' operators.
2) It treats the ? : operator as left-associative, whereas in C it is
right-associative.
3) plural.y uses <stdarg.h>. But gettext/intl must be portable to K&R C
compilers, which have only <varargs.h>. The resulting #ifdef is horrible.
And stdarg/varargs is not very efficient on many RISC processors.
4) When a malloc() call returns NULL, it executes YYABORT, which leaks
memory (namely, all the 'struct expression *' on the stack).
The attached patch does the following:
1) It adds the C comparison and logical negation operators.
2) It makes the ? : operator right-associative.
3) It passes multiple subexpressions as an array of 'struct expression *',
instead of varargs, thus achieving K&R C portability.
4) When malloc() returns NULL, it continues parsing and thus eventually
frees all the stacked subexpressions.
5) It reduces the total size of the intl/*.os files (on x86):
text data bss dec hex filename
17011 128 92 17231 434f intl.orig/all.os
16993 140 92 17225 4349 intl.new/all.os
(dcigettext.os minus 64 bytes, plural.os plus 46 bytes)
2001-03-17 Bruno Haible <haible@clisp.cons.org>
* intl/gettextP.h (struct expression): Add operators lnot, less_than,
greater_than, less_or_equal, greater_or_equal. Replace args2/args3
union by a 'nargs' counter and an 'args[]' array.
* intl/plural.y: Don't include stdarg.h.
(new_exp): Take an array of arguments instead of varargs.
(new_exp_0, new_exp_1, new_exp_2, new_exp_3): New functions.
('?' ':'): Make right-associative.
(EQUOP2): New token, replaces '=' and '!'.
(CMPOP2): New token.
(ADDOP2): New token, replaces '+' and '-'.
(MULOP2): New token, replaces '*', '/' and '%'.
('!'): New token.
(exp): Add rules for CMPOP2 and '!'. Don't call YYABORT.
(start): Call YYABORT here.
(FREE_EXPRESSION): Update.
(yylex): Don't skip "\\n". Recognize comparison and '!' operators.
Update for new token symbols.
* intl/loadmsgcat.c (plvar, plone, germanic_plural,
init_germanic_plural): Update.
* intl/dcigettext.c (_nl_find_msg): Optimize for space.
(plural_eval): Recognize comparison and '!' operators. Optimize for
space.
diff -r -c3 intl/gettextP.h intl-100/gettextP.h
*** intl/gettextP.h Sat Mar 17 19:29:12 2001
--- intl/gettextP.h Sun Mar 18 00:19:31 2001
***************
*** 76,110 ****
plural form. */
struct expression
{
enum operator
{
var, /* The variable "n". */
num, /* Decimal number. */
mult, /* Multiplication. */
divide, /* Division. */
module, /* Module operation. */
plus, /* Addition. */
minus, /* Subtraction. */
equal, /* Comparision for equality. */
not_equal, /* Comparision for inequality. */
land, /* Logical AND. */
lor, /* Logical OR. */
qmop /* Question mark operator. */
} operation;
union
{
unsigned long int num; /* Number value for `num'. */
! struct
! {
! struct expression *left; /* Left expression in binary operation. */
! struct expression *right; /* Right expression in binary operation. */
! } args2;
! struct
! {
! struct expression *bexp; /* Boolean expression in ?: operation. */
! struct expression *tbranch; /* True-branch in ?: operation. */
! struct expression *fbranch; /* False-branch in ?: operation. */
! } args3;
} val;
};
--- 76,110 ----
plural form. */
struct expression
{
+ int nargs; /* Number of arguments. */
enum operator
{
+ /* Without arguments: */
var, /* The variable "n". */
num, /* Decimal number. */
+ /* Unary operators: */
+ lnot, /* Logical NOT. */
+ /* Binary operators: */
mult, /* Multiplication. */
divide, /* Division. */
module, /* Module operation. */
plus, /* Addition. */
minus, /* Subtraction. */
+ less_than, /* Comparison. */
+ greater_than, /* Comparison. */
+ less_or_equal, /* Comparison. */
+ greater_or_equal, /* Comparison. */
equal, /* Comparision for equality. */
not_equal, /* Comparision for inequality. */
land, /* Logical AND. */
lor, /* Logical OR. */
+ /* Ternary operators: */
qmop /* Question mark operator. */
} operation;
union
{
unsigned long int num; /* Number value for `num'. */
! struct expression *args[3]; /* Up to three arguments. */
} val;
};
diff -r -c3 intl/plural.y intl-100/plural.y
*** intl/plural.y Sat Mar 17 18:54:33 2001
--- intl/plural.y Sun Mar 18 00:37:08 2001
***************
*** 22,28 ****
# include <config.h>
#endif
- #include <stdarg.h>
#include <stdlib.h>
#include "gettextP.h"
--- 22,27 ----
***************
*** 45,66 ****
%union {
unsigned long int num;
struct expression *exp;
}
%{
/* Prototypes for local functions. */
! static struct expression *new_exp (enum operator op, int n, ...);
! static int yylex (YYSTYPE *lval, const char **pexp);
! static void yyerror (const char *str);
%}
! %left '?'
! %left '|'
! %left '&'
! %left '=', '!'
! %left '+', '-'
! %left '*', '/', '%'
%token <num> NUMBER
%type <exp> exp
--- 44,167 ----
%union {
unsigned long int num;
+ enum operator op;
struct expression *exp;
}
%{
/* Prototypes for local functions. */
! static struct expression *new_exp PARAMS ((int nargs, enum operator op,
! struct expression * const *args));
! static inline struct expression *new_exp_0 PARAMS ((enum operator op));
! static inline struct expression *new_exp_1 PARAMS ((enum operator op,
! struct expression *right));
! static struct expression *new_exp_2 PARAMS ((enum operator op,
! struct expression *left,
! struct expression *right));
! static inline struct expression *new_exp_3 PARAMS ((enum operator op,
! struct expression *bexp,
! struct expression *tbranch,
! struct expression *fbranch));
! static int yylex PARAMS ((YYSTYPE *lval, const char **pexp));
! static void yyerror PARAMS ((const char *str));
!
! /* Allocation of expressions. */
!
! static struct expression *
! new_exp (nargs, op, args)
! int nargs;
! enum operator op;
! struct expression * const *args;
! {
! int i;
! struct expression *newp;
!
! /* If any of the argument could not be malloc'ed, just return NULL. */
! for (i = nargs - 1; i >= 0; i--)
! if (args[i] == NULL)
! goto fail;
!
! /* Allocate a new expression. */
! newp = (struct expression *) malloc (sizeof (*newp));
! if (newp != NULL)
! {
! newp->nargs = nargs;
! newp->operation = op;
! for (i = nargs - 1; i >= 0; i--)
! newp->val.args[i] = args[i];
! return newp;
! }
!
! fail:
! for (i = nargs - 1; i >= 0; i--)
! FREE_EXPRESSION (args[i]);
!
! return NULL;
! }
!
! static inline struct expression *
! new_exp_0 (op)
! enum operator op;
! {
! return new_exp (0, op, NULL);
! }
!
! static inline struct expression *
! new_exp_1 (op, right)
! enum operator op;
! struct expression *right;
! {
! struct expression *args[1];
!
! args[0] = right;
! return new_exp (1, op, args);
! }
!
! static struct expression *
! new_exp_2 (op, left, right)
! enum operator op;
! struct expression *left;
! struct expression *right;
! {
! struct expression *args[2];
!
! args[0] = left;
! args[1] = right;
! return new_exp (2, op, args);
! }
!
! static inline struct expression *
! new_exp_3 (op, bexp, tbranch, fbranch)
! enum operator op;
! struct expression *bexp;
! struct expression *tbranch;
! struct expression *fbranch;
! {
! struct expression *args[3];
!
! args[0] = bexp;
! args[1] = tbranch;
! args[2] = fbranch;
! return new_exp (3, op, args);
! }
!
%}
! /* This declares that all operators have the same associativity and the
! precedence order as in C. See [Harbison, Steele: C, A Reference Manual].
! There is no unary minus and no bitwise operators.
! Operators with the same syntactic behaviour have been merged into a single
! token, to save space in the array generated by bison. */
! %right '?' /* ? */
! %left '|' /* || */
! %left '&' /* && */
! %left EQUOP2 /* == != */
! %left CMPOP2 /* < > <= >= */
! %left ADDOP2 /* + - */
! %left MULOP2 /* * / % */
! %right '!' /* ! */
!
! %token <op> EQUOP2 CMPOP2 ADDOP2 MULOP2
%token <num> NUMBER
%type <exp> exp
***************
*** 68,183 ****
start: exp
{
((struct parse_args *) arg)->res = $1;
}
;
exp: exp '?' exp ':' exp
{
! if (($$ = new_exp (qmop, 3, $1, $3, $5)) == NULL)
! YYABORT
}
| exp '|' exp
{
! if (($$ = new_exp (lor, 2, $1, $3)) == NULL)
! YYABORT
}
| exp '&' exp
{
! if (($$ = new_exp (land, 2, $1, $3)) == NULL)
! YYABORT
! }
! | exp '=' exp
! {
! if (($$ = new_exp (equal, 2, $1, $3)) == NULL)
! YYABORT
}
! | exp '!' exp
{
! if (($$ = new_exp (not_equal, 2, $1, $3)) == NULL)
! YYABORT
}
! | exp '+' exp
{
! if (($$ = new_exp (plus, 2, $1, $3)) == NULL)
! YYABORT
}
! | exp '-' exp
{
! if (($$ = new_exp (minus, 2, $1, $3)) == NULL)
! YYABORT
}
! | exp '*' exp
{
! if (($$ = new_exp (mult, 2, $1, $3)) == NULL)
! YYABORT
}
! | exp '/' exp
{
! if (($$ = new_exp (divide, 2, $1, $3)) == NULL)
! YYABORT
! }
! | exp '%' exp
! {
! if (($$ = new_exp (module, 2, $1, $3)) == NULL)
! YYABORT
}
| 'n'
{
! if (($$ = new_exp (var, 0)) == NULL)
! YYABORT
}
| NUMBER
{
! if (($$ = new_exp (num, 0)) == NULL)
! YYABORT;
! $$->val.num = $1
}
| '(' exp ')'
{
! $$ = $2
}
;
%%
- static struct expression *
- new_exp (enum operator op, int n, ...)
- {
- struct expression *newp = (struct expression *) calloc (1, sizeof (*newp));
- va_list va;
-
- va_start (va, n);
-
- if (newp == NULL)
- while (n-- > 0)
- FREE_EXPRESSION (va_arg (va, struct expression *));
- else
- {
- newp->operation = op;
- if (n > 0)
- {
- newp->val.args3.bexp = va_arg (va, struct expression *);
- newp->val.args3.tbranch = va_arg (va, struct expression *);
-
- if (n > 2)
- newp->val.args3.fbranch = va_arg (va, struct expression *);
-
- if (newp->val.args3.bexp == NULL
- || newp->val.args3.tbranch == NULL
- || (n > 2 && newp->val.args3.fbranch == NULL))
- {
- FREE_EXPRESSION (newp);
- newp = NULL;
- }
- }
- }
-
- va_end (va);
-
- return newp;
- }
-
void
internal_function
FREE_EXPRESSION (exp)
--- 169,229 ----
start: exp
{
+ if ($1 == NULL)
+ YYABORT;
((struct parse_args *) arg)->res = $1;
}
;
exp: exp '?' exp ':' exp
{
! $$ = new_exp_3 (qmop, $1, $3, $5);
}
| exp '|' exp
{
! $$ = new_exp_2 (lor, $1, $3);
}
| exp '&' exp
{
! $$ = new_exp_2 (land, $1, $3);
}
! | exp EQUOP2 exp
{
! $$ = new_exp_2 ($2, $1, $3);
}
! | exp CMPOP2 exp
{
! $$ = new_exp_2 ($2, $1, $3);
}
! | exp ADDOP2 exp
{
! $$ = new_exp_2 ($2, $1, $3);
}
! | exp MULOP2 exp
{
! $$ = new_exp_2 ($2, $1, $3);
}
! | '!' exp
{
! $$ = new_exp_1 (lnot, $2);
}
| 'n'
{
! $$ = new_exp_0 (var);
}
| NUMBER
{
! if (($$ = new_exp_0 (num)) != NULL)
! $$->val.num = $1;
}
| '(' exp ')'
{
! $$ = $2;
}
;
%%
void
internal_function
FREE_EXPRESSION (exp)
***************
*** 187,211 ****
return;
/* Handle the recursive case. */
! switch (exp->operation)
{
! case qmop:
! FREE_EXPRESSION (exp->val.args3.fbranch);
/* FALLTHROUGH */
-
- case mult:
- case divide:
- case module:
- case plus:
- case minus:
- case equal:
- case not_equal:
- case land:
- case lor:
- FREE_EXPRESSION (exp->val.args2.right);
- FREE_EXPRESSION (exp->val.args2.left);
- break;
-
default:
break;
}
--- 233,249 ----
return;
/* Handle the recursive case. */
! switch (exp->nargs)
{
! case 3:
! FREE_EXPRESSION (exp->val.args[2]);
! /* FALLTHROUGH */
! case 2:
! FREE_EXPRESSION (exp->val.args[1]);
! /* FALLTHROUGH */
! case 1:
! FREE_EXPRESSION (exp->val.args[0]);
/* FALLTHROUGH */
default:
break;
}
***************
*** 224,235 ****
while (1)
{
- if (exp[0] == '\\' && exp[1] == '\n')
- {
- exp += 2;
- continue;
- }
-
if (exp[0] == '\0')
{
*pexp = exp;
--- 262,267 ----
***************
*** 261,273 ****
break;
case '=':
- case '!':
if (exp[0] == '=')
! ++exp;
else
result = YYERRCODE;
break;
case '&':
case '|':
if (exp[0] == result)
--- 293,317 ----
break;
case '=':
if (exp[0] == '=')
! {
! ++exp;
! lval->op = equal;
! result = EQUOP2;
! }
else
result = YYERRCODE;
break;
+ case '!':
+ if (exp[0] == '=')
+ {
+ ++exp;
+ lval->op = not_equal;
+ result = EQUOP2;
+ }
+ break;
+
case '&':
case '|':
if (exp[0] == result)
***************
*** 276,287 ****
result = YYERRCODE;
break;
! case 'n':
case '*':
case '/':
case '%':
case '+':
case '-':
case '?':
case ':':
case '(':
--- 320,373 ----
result = YYERRCODE;
break;
! case '<':
! if (exp[0] == '=')
! {
! ++exp;
! lval->op = less_or_equal;
! }
! else
! lval->op = less_than;
! result = CMPOP2;
! break;
!
! case '>':
! if (exp[0] == '=')
! {
! ++exp;
! lval->op = greater_or_equal;
! }
! else
! lval->op = greater_than;
! result = CMPOP2;
! break;
!
case '*':
+ lval->op = mult;
+ result = MULOP2;
+ break;
+
case '/':
+ lval->op = divide;
+ result = MULOP2;
+ break;
+
case '%':
+ lval->op = module;
+ result = MULOP2;
+ break;
+
case '+':
+ lval->op = plus;
+ result = ADDOP2;
+ break;
+
case '-':
+ lval->op = minus;
+ result = ADDOP2;
+ break;
+
+ case 'n':
case '?':
case ':':
case '(':
diff -r -c3 intl/loadmsgcat.c intl-100/loadmsgcat.c
*** intl/loadmsgcat.c Sat Mar 17 19:47:06 2001
--- intl/loadmsgcat.c Sat Mar 17 23:39:33 2001
***************
*** 146,155 ****
--- 146,157 ----
form determination. It represents the expression "n != 1". */
static const struct expression plvar =
{
+ .nargs = 0,
.operation = var,
};
static const struct expression plone =
{
+ .nargs = 0,
.operation = num,
.val =
{
***************
*** 158,170 ****
};
static struct expression germanic_plural =
{
.operation = not_equal,
.val =
{
! .args2 =
{
! .left = (struct expression *) &plvar,
! .right = (struct expression *) &plone
}
}
};
--- 160,173 ----
};
static struct expression germanic_plural =
{
+ .nargs = 2,
.operation = not_equal,
.val =
{
! .args =
{
! [0] = (struct expression *) &plvar,
! [1] = (struct expression *) &plone
}
}
};
***************
*** 185,198 ****
{
if (plone.val.num == 0)
{
plvar.operation = var;
plone.operation = num;
plone.val.num = 1;
germanic_plural.operation = not_equal;
! germanic_plural.val.args2.left = &plvar;
! germanic_plural.val.args2.right = &plone;
}
}
--- 188,204 ----
{
if (plone.val.num == 0)
{
+ plvar.nargs = 0;
plvar.operation = var;
+ plone.nargs = 0;
plone.operation = num;
plone.val.num = 1;
+ germanic_plural.nargs = 2;
germanic_plural.operation = not_equal;
! germanic_plural.val.args[0] = &plvar;
! germanic_plural.val.args[1] = &plone;
}
}
diff -r -c3 intl/dcigettext.c intl-100/dcigettext.c
*** intl/dcigettext.c Sat Mar 17 19:45:29 2001
--- intl/dcigettext.c Sun Mar 18 00:26:39 2001
***************
*** 702,715 ****
nls_uint32 hash_val = hash_string (msgid);
nls_uint32 idx = hash_val % domain->hash_size;
nls_uint32 incr = 1 + (hash_val % (domain->hash_size - 2));
- nls_uint32 nstr = W (domain->must_swap, domain->hash_tab[idx]);
-
- if (nstr == 0)
- /* Hash table entry is empty. */
- return NULL;
while (1)
{
/* Compare msgid with the original string at index nstr-1.
We compare the lengths with >=, not ==, because plural entries
are represented by strings with an embedded NUL. */
--- 702,716 ----
nls_uint32 hash_val = hash_string (msgid);
nls_uint32 idx = hash_val % domain->hash_size;
nls_uint32 incr = 1 + (hash_val % (domain->hash_size - 2));
while (1)
{
+ nls_uint32 nstr = W (domain->must_swap, domain->hash_tab[idx]);
+
+ if (nstr == 0)
+ /* Hash table entry is empty. */
+ return NULL;
+
/* Compare msgid with the original string at index nstr-1.
We compare the lengths with >=, not ==, because plural entries
are represented by strings with an embedded NUL. */
***************
*** 727,737 ****
idx -= domain->hash_size - incr;
else
idx += incr;
-
- nstr = W (domain->must_swap, domain->hash_tab[idx]);
- if (nstr == 0)
- /* Hash table entry is empty. */
- return NULL;
}
/* NOTREACHED */
}
--- 728,733 ----
***************
*** 1005,1047 ****
struct expression *pexp;
unsigned long int n;
{
! switch (pexp->operation)
{
! case var:
! return n;
! case num:
! return pexp->val.num;
! case mult:
! return (plural_eval (pexp->val.args2.left, n)
! * plural_eval (pexp->val.args2.right, n));
! case divide:
! return (plural_eval (pexp->val.args2.left, n)
! / plural_eval (pexp->val.args2.right, n));
! case module:
! return (plural_eval (pexp->val.args2.left, n)
! % plural_eval (pexp->val.args2.right, n));
! case plus:
! return (plural_eval (pexp->val.args2.left, n)
! + plural_eval (pexp->val.args2.right, n));
! case minus:
! return (plural_eval (pexp->val.args2.left, n)
! - plural_eval (pexp->val.args2.right, n));
! case equal:
! return (plural_eval (pexp->val.args2.left, n)
! == plural_eval (pexp->val.args2.right, n));
! case not_equal:
! return (plural_eval (pexp->val.args2.left, n)
! != plural_eval (pexp->val.args2.right, n));
! case land:
! return (plural_eval (pexp->val.args2.left, n)
! && plural_eval (pexp->val.args2.right, n));
! case lor:
! return (plural_eval (pexp->val.args2.left, n)
! || plural_eval (pexp->val.args2.right, n));
! case qmop:
! return (plural_eval (pexp->val.args3.bexp, n)
! ? plural_eval (pexp->val.args3.tbranch, n)
! : plural_eval (pexp->val.args3.fbranch, n));
}
/* NOTREACHED */
return 0;
--- 1001,1074 ----
struct expression *pexp;
unsigned long int n;
{
! switch (pexp->nargs)
{
! case 0:
! switch (pexp->operation)
! {
! case var:
! return n;
! case num:
! return pexp->val.num;
! default:
! break;
! }
! /* NOTREACHED */
! break;
! case 1:
! {
! /* pexp->operation must be lnot. */
! unsigned long int arg = plural_eval (pexp->val.args[0], n);
! return ! arg;
! }
! case 2:
! {
! unsigned long int leftarg = plural_eval (pexp->val.args[0], n);
! if (pexp->operation == lor)
! return leftarg || plural_eval (pexp->val.args[1], n);
! else if (pexp->operation == land)
! return leftarg && plural_eval (pexp->val.args[1], n);
! else
! {
! unsigned long int rightarg = plural_eval (pexp->val.args[1], n);
!
! switch (pexp->operation)
! {
! case mult:
! return leftarg * rightarg;
! case divide:
! return leftarg / rightarg;
! case module:
! return leftarg % rightarg;
! case plus:
! return leftarg + rightarg;
! case minus:
! return leftarg - rightarg;
! case less_than:
! return leftarg < rightarg;
! case greater_than:
! return leftarg > rightarg;
! case less_or_equal:
! return leftarg <= rightarg;
! case greater_or_equal:
! return leftarg >= rightarg;
! case equal:
! return leftarg == rightarg;
! case not_equal:
! return leftarg != rightarg;
! default:
! break;
! }
! }
! /* NOTREACHED */
! break;
! }
! case 3:
! {
! /* pexp->operation must be qmop. */
! unsigned long int boolarg = plural_eval (pexp->val.args[0], n);
! return plural_eval (pexp->val.args[boolarg ? 1 : 2], n);
! }
}
/* NOTREACHED */
return 0;