This is the mail archive of the crossgcc@sources.redhat.com mailing list for the crossgcc project.

See the CrossGCC FAQ for lots more information.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

Re: More inline arm assembly woes... (long)

From: Bryce Schober <bryceman at dpzone dot com>
To: Dan Kegel <dank at kegel dot com>
Cc: crossgcc at sources dot redhat dot com
Date: Tue, 21 Oct 2003 16:32:39 -0700
Subject: Re: More inline arm assembly woes... (long)
References: <3F95B876.4090108@dpzone.com> <3F95BFCF.6090509@kegel.com>

Dan Kegel wrote:

Is result global? If not, gcc can legally nuke it, I think...

It might help to show your actual source.

Relevant sources attached (but not everything needed to build it).

--
Bryce Schober
Design Engineer
Dynon Avionics, Inc.
www.dynonavionics.com


#ifndef __FIXP_MUL_H__
#define __FIXP_MUL_H__


/*
	File: Multiplication Functions

	(fixp_mul.h) This set of functions implement fixed-point multiplication 
	for signed and unsigned numbers in a variety of <Qm.n Format>s.

	There are several different functions which behave in slightly different 
	ways. The function names specify this behavior.  The "32s" in the function 
	name indicates that the inputs are 32-bit signed integer types. For 
	unsigned inputs, use the "32u" functions. The "nXX" part of the function 
	name indicates the <Qm.n Format> of the inputs and output. For 8.24 fixed-
	point numbers the "_n24" functions should be used. If an output format 
	different than that of the input is needed, then the "_nX" functions 
	should be used.

	The n parameter should specify the "n" part of the <Qm.n Format> for 
	_both_ a and b for the normally expected result. More specifically, the n 
	argument specifies the right-shift amount performed on the 64-bit 
	intermediate result of the 32x32 multiply. It can, however, be used in 
	abnormal ways to re-format the resulting number to a different 
	<Qm.n Format>. For example, if a and b are in 24.8 format, but you wanted 
	to keep all precision from the multiply and get the result in 16.16 
	format, you can simply call the function with n=0 (but _make sure_ that 
	your answer won't overflow). Another example would be converting degrees 
	to radians. Given an input degrees in 16.16 format and a desired radian 
	output in 8.24 format, simply call the function with a=<input in degrees>, 
	b=74961321 (pi/180*2^32), and n=24.
	
	Parameters:
		x -	The first fixed-point number.
		y -	The second fixed-point number.
		n - [Optional] The UNS_8 shift amount (1<=n<=32). 
			(usually the n part of the input Qm.n fixed-point format)

	Returns:
		( x * y ) >> n		(with rounding)

	Macros Used:
		- <fixp_mul_32s_nX>
		- <fixp_mul_32u_nX>

*/


#include <SMA_types.h>
#include "fixp_macros.h"




/*
	Function: fixp_mul_32s_nX

	Multiplies two *signed* m.n fixed-point numbers, using user-specified 
	shift with rounding.

	Parameters:
		x -	The first INT_32 fixed-point number.
		y -	The second INT_32 fixed-point number.
		n - The UNS_8 shift amount (1<=n<=32). 
			(usually the n part of the input Qm.n fixed-point format)

	Returns:
		(INT_32) ( x * y ) >> n    (with rounding)

	See Also:
		<Multiplication Functions>, <Qm.n Format>

*/
static inline INT_32 fixp_mul_32s_nX( INT_32 x, INT_32 y, UNS_8 n ) { 
	INT_32 res, tmp;
	__asm__ __volatile__ (
		"smull	%0, %1, %3, %4				\n\t"
		"movs	%0, %0, lsr %2				\n\t"
		"rsb	%2, %2, #32					\n\t"
		"adc	%1, %0, %1, lsl %2			\n\t"
		: "=&r" (tmp), "=&r" (res), "+r" (n)
		: "r" (x), "r" (y)
	);
	return res;
}




/*
	Function: fixp_mul_32u_nX

	Multiplies two *unsigned* m.n fixed-point numbers, using user-specified 
	shift with rounding.

	Parameters:
		x -	The first UNS_32 fixed-point number.
		y -	The second UNS_32 fixed-point number.
		n - The UNS_8 shift amount (1<=n<=32). 
			(usually the n part of the input Qm.n fixed-point format)

	Returns:
		(UNS_32) ( x * y ) >> n    (with rounding)

	See Also:
		<Multiplication Functions>, <Qm.n Format>

*/
static inline UNS_32 fixp_mul_32u_nX( UNS_32 x, UNS_32 y, UNS_8 n ) { 
	UNS_32 res, tmp;
	__asm__ __volatile__ (
		"umull	%0, %1, %3, %4				\n\t"
		"movs	%0, %0, lsr %2				\n\t"
		"rsb	%2, %2, #32					\n\t"
		"adc	%1, %0, %1, lsl %2			\n\t"
		: "=&r" (tmp), "=&r" (res), "+r" (n)
		: "r" (x), "r" (y)
	);
	return res;
}




/*
	Function: fixp_mul_32s_n8

	Multiplies two *signed* 24.8 fixed-point numbers

	Parameters:
		x -	The first INT_32 fixed-point number.
		y -	The second INT_32 fixed-point number.

	Returns:
		(INT_32) ( x * y ) >> 8    (with rounding)

	See Also:
		<Multiplication Functions>, <Qm.n Format>

*/
static inline INT_32 fixp_mul_32s_n8( INT_32 x, INT_32 y ) {
	return fixp_mul_32s_nX( x, y, 8 );
}




/*
	Function: fixp_mul_32u_n8

	Multiplies two *unsigned* 24.8 fixed-point numbers

	Parameters:
		x -	The first UNS_32 fixed-point number.
		y -	The second UNS_32 fixed-point number.

	Returns:
		(UNS_32) ( x * y ) >> 8    (with rounding)

	See Also:
		<Multiplication Functions>, <Qm.n Format>

*/
static inline UNS_32 fixp_mul_32u_n8( UNS_32 x, UNS_32 y ) {
	return fixp_mul_32u_nX( x, y, 8 );
}




/*
	Function: fixp_mul_32s_n16

	Multiplies two *signed* 16.16 fixed-point numbers

	Parameters:
		x -	The first INT_32 fixed-point number.
		y -	The second INT_32 fixed-point number.

	Returns:
		(INT_32) ( x * y ) >> 16    (with rounding)

	See Also:
		<Multiplication Functions>, <Qm.n Format>

*/
static inline INT_32 fixp_mul_32s_n16( INT_32 x, INT_32 y ) {
	return fixp_mul_32s_nX( x, y, 16 );
}




/*
	Function: fixp_mul_32u_n16

	Multiplies two *unsigned* 16.16 fixed-point numbers

	Parameters:
		x -	The first UNS_32 fixed-point number.
		y -	The second UNS_32 fixed-point number.

	Returns:
		(UNS_32) ( x * y ) >> 16    (with rounding)

	See Also:
		<Multiplication Functions>, <Qm.n Format>

*/
static inline UNS_32 fixp_mul_32u_n16( UNS_32 x, UNS_32 y ) {
	return fixp_mul_32u_nX( x, y, 16 );
}




/*
	Function: fixp_mul_32s_n24

	Multiplies two *signed* 8.24 fixed-point numbers

	Parameters:
		x -	The first INT_32 fixed-point number.
		y -	The second INT_32 fixed-point number.

	Returns:
		(INT_32) ( x * y ) >> 24    (with rounding)

	See Also:
		<Multiplication Functions>, <Qm.n Format>

*/
static inline INT_32 fixp_mul_32s_n24( INT_32 x, INT_32 y ) {
	return fixp_mul_32s_nX( x, y, 24 );
}




/*
	Function: fixp_mul_32u_n24

	Multiplies two *unsigned* 8.24 fixed-point numbers

	Parameters:
		x -	The first UNS_32 fixed-point number.
		y -	The second UNS_32 fixed-point number.

	Returns:
		(UNS_32) ( x * y ) >> 24    (with rounding)

	See Also:
		<Multiplication Functions>, <Qm.n Format>

*/
static inline UNS_32 fixp_mul_32u_n24( UNS_32 x, UNS_32 y ) {
	return fixp_mul_32u_nX( x, y, 24);
}




#endif /* __FIXP_MUL_H__ */


#ifndef __FIXP_H__
#define __FIXP_H__




/*
	File: Fixed-Point Math Library

	(fixp.h) This is the fixed-point math library for Dynon Avionics.  Its current 
	implementation is optimized ARM7 assembly, and will likely be extended to 
	include generic C implementations for debugging on i386 machines in the 
	near future.  Because of limitations between the GNU C compiler and its 
	inline assembly syntax, some of the highly optimized functions are 
	implemented as macros which use inline assembly.  These functions can be 
	found in the <Macros> file, fixp_macros.h.

	This library is loosely based on the functions offered by Intel's Graphics 
	Performance Primitives Library, found here: 
	- http://developer.intel.com/design/pca/applicationsprocessors/swsup/gpp.htm

	It may be helpful to the programmer to review the introductory text for 
	the above library found here:
	- http://www.devx.com/Intel/Article/16478

	This file is basically a placeholder for this documentation and for 
	including the entire fixed-point library at once.

	Files:
		- <General Functions>
		- <Multiplication Functions>
		- <Macros>

*/


#include "fixp_gen.h"
#include "fixp_mul.h"




/*
	Topic: Qm.n Format

	The <Fixed-Point Math Library> library uses the Qm.n fixed-point 
	representation of data for its functions. The Qm.n format provides a 
	standard mechanism for representing fixed-point values. The integer binary 
	word is partitioned using an imaginary fixed point. The n-bits to the 
	right of the imaginary point comprise the fractional portion of the value 
	being represented, and these n-bits act as weights for negative powers of 
	2. The m-bits to the left of the imaginary point comprise the integer 
	portion of the value being represented, and these m-bits act as weights 
	for positive powers of 2. The overall signed Qm.n representation requires 
	a total of m+n bits for unsigned numbers or 1+m+n bits for signed numbers, 
	with the additional bit used to indicate the sign.

*/




#endif /* __FIXP_H__ */


/* 

	File: Test Suite

	This file is the test suite for the <Fixed-Point Math Library>.

*/


#include <stdlib.h>
#include <math.h>
#include <SMA_types.h>
#include <DDB_globals.h>
#include <LH79520_map.h>
#include "fixp.h"
#include "fixp_test_data.h"




#define TEST_LOOPS 1
#define NUM_SET_SIZE 1002



void uart0_init();
void uart0_out_str( char *string );

float float_rand[NUM_SET_SIZE], float_res[NUM_SET_SIZE];
int int_rand[NUM_SET_SIZE], int_res[NUM_SET_SIZE];



/*

	Function: main
	
	This is the main() function for the <Fixed-Point Math Library> <Test Suite>.

*/
int main() {

	float float_a, float_b;
	INT_32 int_a, int_b, result;
	register int i, j;
	char outline[81];

	uart0_init();	// Initialize uart0

	/*	Initialize the random number arrays.  Both arrays should get numbers
	 *	between 0 and 32767.  The integerss will be interpreted as 16.16
	 *	fixed-point numbers and the floats will be assigned accordingly.
	 */
	uart0_out_str( "\rInitializing random operands...\r" );
	srand(1);							// Seed the random number generator
	for( i=0; i<6; i++ ) {
		for( j=i*NUM_SET_SIZE/6; j<(i+1)*NUM_SET_SIZE/6; j++ ) {
			switch( i ) {
				case 0:
					int_rand[j] = rand() >> 8;
					break;
				case 1:
				case 2:
				case 3:
					int_rand[j] = rand() >> 16;
					break;
				case 4:
				case 5:
					int_rand[j] = rand() >> 8;
					break;
			}
			float_rand[j] = (float) int_rand[j] / (float) 65536;
		}
	}
	uart0_out_str( "Operand initialization complete.\r" );

	/* Initialize wiggle pins to be general-purpose output */
	IOCON->lcdmux = 0x0000;				// Set all LCD pins to be general-purpose
	GPIOC->ddr = 0xFF;					// Set data direction to output
	GPIOC->dr &= ~0x08;					// Set PortC[3] low
	GPIOC->dr ^= 0x08;					// Toggle PortC[3]


	/* Do baseline loop */
	uart0_out_str( "Running baseline loop...\r" );
	for( i=0; i<TEST_LOOPS; i++ ) {
		for( j=0; j<NUM_SET_SIZE; j++ ) {
			int_a = int_rand[j];
			int_b = int_rand[NUM_SET_SIZE-j];
			int_res[j] = int_b;
		}
		GPIOC->dr ^= 0x08;					// Toggle PortC[3]
	}
	uart0_out_str( "Baseline loop complete.\r" );

	for( i=0; i<TEST_LOOPS*TEST_LOOPS; i++ ) { GPIOC->dr ^= 0x08;}

	/* Do fixed-point rounded multiply loop */
	uart0_out_str( "Running fixed-point rounded loop...\r" );
	for( i=0; i<TEST_LOOPS; i++ ) {
		for( j=0; j<NUM_SET_SIZE; j++ ) {
			int_a = int_rand[j];
			int_b = int_rand[NUM_SET_SIZE-j-1];
			int_res[j] = fixp_mul_32u_n16( int_a, int_b );
		}
		GPIOC->dr ^= 0x08;					// Toggle PortC[3]
	}
	uart0_out_str( "Fixed-point rounded loop complete.\r" );

	for( i=0; i<TEST_LOOPS*TEST_LOOPS; i++ ) { GPIOC->dr ^= 0x08;}

	/* Do floating-point multiply loop */
	uart0_out_str( "Running floating-point loop...\r" );
	for( i=0; i<TEST_LOOPS; i++ ) {
		for( j=0; j<NUM_SET_SIZE; j++ ) {
			float_a = float_rand[j];
			float_b = float_rand[NUM_SET_SIZE-j-1];
			float_res[j] = float_a * float_b;
		}
		GPIOC->dr ^= 0x08;					// Toggle PortC[3]
	}
	uart0_out_str( "Floating-point loop complete.\r\r" );

	/* Print inputs & outputs */
	for( i=0; i<NUM_SET_SIZE; i++ ) {

		result = sprintf(
			outline,
			"%d,%d,%.14e,%.14e\r",
			int_rand[i],
			int_res[i],
			float_rand[i],
			float_res[i]
		);

		uart0_out_str( outline );
	}

	int_a = int_rand[0];
	int_b = int_rand[1];
	result = fixp_mul_32u_n16( int_a, int_b );
	int_a = int_rand[2];
	int_b = int_rand[3];
	result = result + fixp_mul_32u_n16( int_a, int_b );

	/* Loop forever */
	while(1) { GPIOC->dr ^= 0x08; }

	return 0;
}




/* 
	Function: uart0_init

	This function initializes the uart on the Sharp LH79520 Dynon Display Board 
	for use in reporting the test results of the <Fixed-Point Math Library> 
	<Test Suite>.  It has no parameters or return value.

*/
void uart0_init() {

	UART0->cr = 0x0;		// Disable uart to enable settings changes
	UART0->imsc = 0x0;		// Disable all uart interrupts
	UART0->ibrd = UARTBRINT_115200;		// Integer baud rate setting
	UART0->fbrd = UARTBRFRAC_115200;	// Fraction baud rate setting
	UART0->lcr_h = UARTLCR_WLEN8 | UARTLCR_PARITY_NONE | UARTLCR_STP1;    // 8 N 1
	UART0->cr = UARTCR_ENABLE | UARTCR_TXE | UARTCR_RXE;	// Enable the uart

}




/* 
	Function: uart0_out_str

	This function writes a null-terminated string to UART0. It has no return 
	value.
	
	Parameters:
		*string - Pointer to a null-terminated string.

*/
void uart0_out_str( char *string ) {

	while ( *string ) {

		// Wait while TX fifo is full
		while ( UART0->fr & UARTFR_TXFF ) { }

		// Output char and increment pointer
		UART0->dr = *string;
		string++;
	}

	return;
}

------
Want more information?  See the CrossGCC FAQ, http://www.objsw.com/CrossGCC/
Want to unsubscribe? Send a note to crossgcc-unsubscribe@sources.redhat.com

Follow-Ups:
- Re: More inline arm assembly woes... (long)
  - From: Dan Kegel

References:
- More inline arm assembly woes... (long)
  - From: Bryce Schober
- Re: More inline arm assembly woes... (long)
  - From: Dan Kegel

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]