This is the mail archive of the ecos-discuss@sources.redhat.com mailing list for the eCos project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Re: TCP/IP performance under eCos & other compilers than gnu


On Mon, Oct 01, 2001 at 09:39:20AM +0200, Andrew Lunn wrote:

> Quite a while back there was a post to the list about gcc producing
> slow CRC calculation code when using -O3. I don't remember all the
> details, so find the post in the archive. I think if you compiled the
> CRC code -O you got faster code.

[I assume you're talking about the IP checksum routine, since I
don't remember any CRC stuff in the TCP/IP network stack.] With
-O3 optimization arm-gcc 2.95.2 starts "register thrashing" and
generates code that's about 50% slower than -O0.  I switched to
an assembly language checksum, which improved throughput
considerably on myplatform.  I've attached the changed files in
case you want to try it.

-- 
Grant Edwards
grante@visi.com
//==========================================================================
//
//      sys/netinet/in_cksum.c
//
//     
//
//==========================================================================
//####COPYRIGHTBEGIN####
//                                                                          
// -------------------------------------------                              
// The contents of this file are subject to the Red Hat eCos Public License 
// Version 1.1 (the "License"); you may not use this file except in         
// compliance with the License.  You may obtain a copy of the License at    
// http://www.redhat.com/                                                   
//                                                                          
// Software distributed under the License is distributed on an "AS IS"      
// basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See the 
// License for the specific language governing rights and limitations under 
// the License.                                                             
//                                                                          
// The Original Code is eCos - Embedded Configurable Operating System,      
// released September 30, 1998.                                             
//                                                                          
// The Initial Developer of the Original Code is Red Hat.                   
// Portions created by Red Hat are                                          
// Copyright (C) 1998, 1999, 2000 Red Hat, Inc.                             
// All Rights Reserved.                                                     
// -------------------------------------------                              
//                                                                          
//####COPYRIGHTEND####
//####BSDCOPYRIGHTBEGIN####
//
// -------------------------------------------
//
// Portions of this software may have been derived from OpenBSD or other sources,
// and are covered by the appropriate copyright disclaimers included herein.
//
// -------------------------------------------
//
//####BSDCOPYRIGHTEND####
//==========================================================================
//#####DESCRIPTIONBEGIN####
//
// Author(s):    gthomas
// Contributors: gthomas
// Date:         2000-01-10
// Purpose:      
// Description:  
//              
//
//####DESCRIPTIONEND####
//
//==========================================================================


/*	$OpenBSD: in_cksum.c,v 1.3 1997/02/24 14:06:35 niklas Exp $	*/
/*	$NetBSD: in_cksum.c,v 1.11 1996/04/08 19:55:37 jonathan Exp $	*/

/*
 * Copyright (c) 1988, 1992, 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by the University of
 *	California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *	@(#)in_cksum.c	8.1 (Berkeley) 6/10/93
 */

#include <sys/param.h>
#include <sys/mbuf.h>
#ifndef __ECOS
#include <sys/systm.h>
#endif
#include <netinet/in.h>

/*
 * This routine is very heavily used in the network
 * code and should be modified for each CPU to be as fast as possible.
 */

extern unsigned ipChecksum(unsigned char *p, int len, unsigned currentSum);

struct net_stats stats_in_cksum;
  
int in_cksum(register struct mbuf *m, register int len)
{
  register unsigned sum = 0;
  register int mlen;

  START_STATS();
  
  while (m && len>0)
    {
      if (len < m->m_len)
        mlen = len;
      else
        mlen = m->m_len;
      asm(" ldr r0,%0" : : "m" (m->m_data) : "r0");
      asm(" mov r1,%0" : : "r" (mlen)  : "r1");
      asm(" mov r2,%0" : : "r" (sum) : "r2");
      asm(" bl ipChecksum" : : : "r0","r1","r2","r3","r4","r5","r6","r7","r8","r9","cc","lr");
      asm(" mov %0,r0" : "=r" (sum));
      len -= mlen;
      m = m->m_next;
    }
  
  if (len)
    diag_printf("nin_cksum: out of data\n");

  FINISH_STATS(stats_in_cksum);
  
  return (~sum & 0xffff);
}
	.section .text,"ax"

/* data pointer in r0, byte count in r1, initial sum in r2
 * destroys registers r0-r9.  Result in r0 
 */
        
	.global ipChecksum
ipChecksum:

/* do large blocks in first loop. It might be slightly 
 * advantageous to make the block size match either TCP or IP
 * header size. 
 */

loop1:
        cmp    r1,#56
        blt    loop1done
	ldmia  r0!,{r3,r4,r5,r6,r7,r8,r9}
        adds   r2,r2,r3
        adcs   r2,r2,r4
        adcs   r2,r2,r5
        adcs   r2,r2,r6
        adcs   r2,r2,r7
        adcs   r2,r2,r8
        adcs   r2,r2,r9
	ldmia  r0!,{r3,r4,r5,r6,r7,r8,r9}
        adcs   r2,r2,r3
        adcs   r2,r2,r4
        adcs   r2,r2,r5
        adcs   r2,r2,r6
        adcs   r2,r2,r7
        adcs   r2,r2,r8
        adcs   r2,r2,r9
        adc    r2,r2,#0         @ add in pending carry
        subs   r1,r1,#56
        beq    fold
        b      loop1
loop1done:
        
        
        @ do 4-byte (one word) blocks
loop2:        
	cmp    r1,#4          
        blt    loop2done
	ldr    r4,[r0],#4
        adds   r2,r2,r4
        adc    r2,r2,#0
        subs   r1,r1,#4
        beq    fold
        b      loop2
loop2done:

        mvn    r3,#0            @ r3 <= 0xffffffff
        
        @ finish up with partial word (byte count <= 3)

        ldr    r4,[r0]          @ load last byte[s]
        
        @ mask r4 using byte count
        
        mov    r5,r1,lsl #3     @ byteCount * 8
        bic    r4,r4,r3,lsr r5  @ clear unused bits
        adds   r2,r2,r4
        adc    r2,r2,#0
        
fold:   
        mvn    r3,#0               @ r3 <= 0xffffffff
	adds   r2,r2,r2,lsl #16    @ add lower half into upper half
        rsc    r0,r3,r2,lsr #16    @ r0 <= (r2>>16) + Carry. Honest.

@ if those last 7 lines of code were not confusing, then you
@ were not paying attention.

        mov    pc,lr

        
        
        

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]