This is the mail archive of the cygwin-cvs@cygwin.com mailing list for the Cygwin project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[newlib-cygwin] Cygwin: Add IUTF8 termios iflag


https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=095cac4b8df4a6d8e5cce37e8fef227aab43bf82

commit 095cac4b8df4a6d8e5cce37e8fef227aab43bf82
Author: Corinna Vinschen <corinna@vinschen.de>
Date:   Tue Jan 31 15:36:24 2017 +0100

    Cygwin: Add IUTF8 termios iflag
    
    The termios code doesn't handle erasing of multibyte characters
    in canonical mode, it always erases a single byte.  When entering
    a multibyte character and then pressing VERASE, the input ends up
    with an invalid character.
    
    Following Linux we introduce the IUTF8 input flag now, set by
    default.  When this flag is set, VERASE or VWERASE will check
    if the just erased input byte is a UTF-8 continuation byte.  If
    so, it erases another byte and checks again until the entire
    UTF-8 character has been removed from the input buffer.
    
    Note that this (just as on Linux) does NOT work with arbitrary
    multibyte codesets.  This only works with UTF-8.
    
    For a discussion what happens, see
    https://cygwin.com/ml/cygwin/2017-01/msg00299.html
    
    Sidenote: The eat_readahead function is now member of fhandler_termios,
    not fhandler_base.  That's necessary to get access to the terminal's
    termios flags.
    
    Signed-off-by: Corinna Vinschen <corinna@vinschen.de>

Diff:
---
 winsup/cygwin/fhandler.cc           | 20 --------------------
 winsup/cygwin/fhandler.h            |  4 ++--
 winsup/cygwin/fhandler_termios.cc   | 29 ++++++++++++++++++++++++++++-
 winsup/cygwin/include/sys/termios.h |  1 +
 4 files changed, 31 insertions(+), 23 deletions(-)

diff --git a/winsup/cygwin/fhandler.cc b/winsup/cygwin/fhandler.cc
index a8fe3b6..aa4cd69 100644
--- a/winsup/cygwin/fhandler.cc
+++ b/winsup/cygwin/fhandler.cc
@@ -111,26 +111,6 @@ fhandler_base::set_readahead_valid (int val, int ch)
 }
 
 int
-fhandler_base::eat_readahead (int n)
-{
-  int oralen = ralen;
-  if (n < 0)
-    n = ralen;
-  if (n > 0 && ralen)
-    {
-      if ((int) (ralen -= n) < 0)
-	ralen = 0;
-
-      if (raixget >= ralen)
-	raixget = raixput = ralen = 0;
-      else if (raixput > ralen)
-	raixput = ralen;
-    }
-
-  return oralen;
-}
-
-int
 fhandler_base::get_readahead_into_buffer (char *buf, size_t buflen)
 {
   int ch;
diff --git a/winsup/cygwin/fhandler.h b/winsup/cygwin/fhandler.h
index c7db8f8..14f7680 100644
--- a/winsup/cygwin/fhandler.h
+++ b/winsup/cygwin/fhandler.h
@@ -288,8 +288,6 @@ class fhandler_base
   int get_readahead ();
   int peek_readahead (int queryput = 0);
 
-  int eat_readahead (int n);
-
   void set_readahead_valid (int val, int ch = -1);
 
   int get_readahead_into_buffer (char *buf, size_t buflen);
@@ -1217,6 +1215,8 @@ class fhandler_termios: public fhandler_base
   int ioctl (int, void *);
   tty_min *_tc;
   tty *get_ttyp () {return (tty *) tc ();}
+  int eat_readahead (int n);
+
  public:
   tty_min*& tc () {return _tc;}
   fhandler_termios () :
diff --git a/winsup/cygwin/fhandler_termios.cc b/winsup/cygwin/fhandler_termios.cc
index dc8a19b..19fcfc9 100644
--- a/winsup/cygwin/fhandler_termios.cc
+++ b/winsup/cygwin/fhandler_termios.cc
@@ -30,7 +30,7 @@ fhandler_termios::tcinit (bool is_pty_master)
 
   if (is_pty_master || !tc ()->initialized ())
     {
-      tc ()->ti.c_iflag = BRKINT | ICRNL | IXON;
+      tc ()->ti.c_iflag = BRKINT | ICRNL | IXON | IUTF8;
       tc ()->ti.c_oflag = OPOST | ONLCR;
       tc ()->ti.c_cflag = B38400 | CS8 | CREAD;
       tc ()->ti.c_lflag = ISIG | ICANON | ECHO | IEXTEN;
@@ -257,6 +257,33 @@ fhandler_termios::bg_check (int sig, bool dontsignal)
 
 #define set_input_done(x) input_done = input_done || (x)
 
+int
+fhandler_termios::eat_readahead (int n)
+{
+  int oralen = ralen;
+  if (n < 0)
+    n = ralen;
+  if (n > 0 && ralen > 0)
+    {
+      if ((int) (ralen -= n) < 0)
+	ralen = 0;
+      /* If IUTF8 is set, the terminal is in UTF-8 mode.  If so, we erase
+	 a complete UTF-8 multibyte sequence on VERASE/VWERASE.  Otherwise,
+	 if we only erase a single byte, invalid unicode chars are left in
+	 the input. */
+      if (tc ()->ti.c_iflag & IUTF8)
+	while (ralen > 0 && ((unsigned char) rabuf[ralen] & 0xc0) == 0x80)
+	  --ralen;
+
+      if (raixget >= ralen)
+	raixget = raixput = ralen = 0;
+      else if (raixput > ralen)
+	raixput = ralen;
+    }
+
+  return oralen;
+}
+
 inline void
 fhandler_termios::echo_erase (int force)
 {
diff --git a/winsup/cygwin/include/sys/termios.h b/winsup/cygwin/include/sys/termios.h
index 38de087..17e8d83 100644
--- a/winsup/cygwin/include/sys/termios.h
+++ b/winsup/cygwin/include/sys/termios.h
@@ -115,6 +115,7 @@ POSIX commands */
 #define IUCLC	0x04000
 #define IXANY	0x08000
 #define PARMRK	0x10000
+#define IUTF8	0x20000
 
 /* oflag bits */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]