Commit d4222cfe by Jeffrey Redondo

Upgrade firmware

parents
Showing with 5053 additions and 0 deletions
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_abs_f32.c
*
* Description: Vector absolute value.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
#include <math.h>
/**
* @ingroup groupMath
*/
/**
* @defgroup BasicAbs Vector Absolute Value
*
* Computes the absolute value of a vector on an element-by-element basis.
*
* <pre>
* pDst[n] = abs(pSrc[n]), 0 <= n < blockSize.
* </pre>
*
* The functions support in-place computation allowing the source and
* destination pointers to reference the same memory buffer.
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
*/
/**
* @addtogroup BasicAbs
* @{
*/
/**
* @brief Floating-point vector absolute value.
* @param[in] *pSrc points to the input buffer
* @param[out] *pDst points to the output buffer
* @param[in] blockSize number of samples in each vector
* @return none.
*/
void arm_abs_f32(
float32_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
float32_t in1, in2, in3, in4; /* temporary variables */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = |A| */
/* Calculate absolute and then store the results in the destination buffer. */
/* read sample from source */
in1 = *pSrc;
in2 = *(pSrc + 1);
in3 = *(pSrc + 2);
/* find absolute value */
in1 = fabsf(in1);
/* read sample from source */
in4 = *(pSrc + 3);
/* find absolute value */
in2 = fabsf(in2);
/* read sample from source */
*pDst = in1;
/* find absolute value */
in3 = fabsf(in3);
/* find absolute value */
in4 = fabsf(in4);
/* store result to destination */
*(pDst + 1) = in2;
/* store result to destination */
*(pDst + 2) = in3;
/* store result to destination */
*(pDst + 3) = in4;
/* Update source pointer to process next sampels */
pSrc += 4u;
/* Update destination pointer to process next sampels */
pDst += 4u;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C = |A| */
/* Calculate absolute and then store the results in the destination buffer. */
*pDst++ = fabsf(*pSrc++);
/* Decrement the loop counter */
blkCnt--;
}
}
/**
* @} end of BasicAbs group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_abs_q15.c
*
* Description: Q15 vector absolute value.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @addtogroup BasicAbs
* @{
*/
/**
* @brief Q15 vector absolute value.
* @param[in] *pSrc points to the input buffer
* @param[out] *pDst points to the output buffer
* @param[in] blockSize number of samples in each vector
* @return none.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function uses saturating arithmetic.
* The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.
*/
void arm_abs_q15(
q15_t * pSrc,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0_FAMILY
__SIMD32_TYPE *simd;
/* Run the below code for Cortex-M4 and Cortex-M3 */
q15_t in1; /* Input value1 */
q15_t in2; /* Input value2 */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
simd = __SIMD32_CONST(pDst);
while(blkCnt > 0u)
{
/* C = |A| */
/* Read two inputs */
in1 = *pSrc++;
in2 = *pSrc++;
/* Store the Absolute result in the destination buffer by packing the two values, in a single cycle */
#ifndef ARM_MATH_BIG_ENDIAN
*simd++ =
__PKHBT(((in1 > 0) ? in1 : (q15_t)__QSUB16(0, in1)),
((in2 > 0) ? in2 : (q15_t)__QSUB16(0, in2)), 16);
#else
*simd++ =
__PKHBT(((in2 > 0) ? in2 : (q15_t)__QSUB16(0, in2)),
((in1 > 0) ? in1 : (q15_t)__QSUB16(0, in1)), 16);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
in1 = *pSrc++;
in2 = *pSrc++;
#ifndef ARM_MATH_BIG_ENDIAN
*simd++ =
__PKHBT(((in1 > 0) ? in1 : (q15_t)__QSUB16(0, in1)),
((in2 > 0) ? in2 : (q15_t)__QSUB16(0, in2)), 16);
#else
*simd++ =
__PKHBT(((in2 > 0) ? in2 : (q15_t)__QSUB16(0, in2)),
((in1 > 0) ? in1 : (q15_t)__QSUB16(0, in1)), 16);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* Decrement the loop counter */
blkCnt--;
}
pDst = (q15_t *)simd;
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = |A| */
/* Read the input */
in1 = *pSrc++;
/* Calculate absolute value of input and then store the result in the destination buffer. */
*pDst++ = (in1 > 0) ? in1 : (q15_t)__QSUB16(0, in1);
/* Decrement the loop counter */
blkCnt--;
}
#else
/* Run the below code for Cortex-M0 */
q15_t in; /* Temporary input variable */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = |A| */
/* Read the input */
in = *pSrc++;
/* Calculate absolute value of input and then store the result in the destination buffer. */
*pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
/* Decrement the loop counter */
blkCnt--;
}
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
}
/**
* @} end of BasicAbs group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_abs_q31.c
*
* Description: Q31 vector absolute value.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @addtogroup BasicAbs
* @{
*/
/**
* @brief Q31 vector absolute value.
* @param[in] *pSrc points to the input buffer
* @param[out] *pDst points to the output buffer
* @param[in] blockSize number of samples in each vector
* @return none.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function uses saturating arithmetic.
* The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF.
*/
void arm_abs_q31(
q31_t * pSrc,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counter */
q31_t in; /* Input value */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t in1, in2, in3, in4;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = |A| */
/* Calculate absolute of input (if -1 then saturated to 0x7fffffff) and then store the results in the destination buffer. */
in1 = *pSrc++;
in2 = *pSrc++;
in3 = *pSrc++;
in4 = *pSrc++;
*pDst++ = (in1 > 0) ? in1 : (q31_t)__QSUB(0, in1);
*pDst++ = (in2 > 0) ? in2 : (q31_t)__QSUB(0, in2);
*pDst++ = (in3 > 0) ? in3 : (q31_t)__QSUB(0, in3);
*pDst++ = (in4 > 0) ? in4 : (q31_t)__QSUB(0, in4);
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C = |A| */
/* Calculate absolute value of the input (if -1 then saturated to 0x7fffffff) and then store the results in the destination buffer. */
in = *pSrc++;
*pDst++ = (in > 0) ? in : ((in == INT32_MIN) ? INT32_MAX : -in);
/* Decrement the loop counter */
blkCnt--;
}
}
/**
* @} end of BasicAbs group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_abs_q7.c
*
* Description: Q7 vector absolute value.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @addtogroup BasicAbs
* @{
*/
/**
* @brief Q7 vector absolute value.
* @param[in] *pSrc points to the input buffer
* @param[out] *pDst points to the output buffer
* @param[in] blockSize number of samples in each vector
* @return none.
*
* \par Conditions for optimum performance
* Input and output buffers should be aligned by 32-bit
*
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function uses saturating arithmetic.
* The Q7 value -1 (0x80) will be saturated to the maximum allowable positive value 0x7F.
*/
void arm_abs_q7(
q7_t * pSrc,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counter */
q7_t in; /* Input value1 */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t in1, in2, in3, in4; /* temporary input variables */
q31_t out1, out2, out3, out4; /* temporary output variables */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = |A| */
/* Read inputs */
in1 = (q31_t) * pSrc;
in2 = (q31_t) * (pSrc + 1);
in3 = (q31_t) * (pSrc + 2);
/* find absolute value */
out1 = (in1 > 0) ? in1 : (q31_t)__QSUB8(0, in1);
/* read input */
in4 = (q31_t) * (pSrc + 3);
/* find absolute value */
out2 = (in2 > 0) ? in2 : (q31_t)__QSUB8(0, in2);
/* store result to destination */
*pDst = (q7_t) out1;
/* find absolute value */
out3 = (in3 > 0) ? in3 : (q31_t)__QSUB8(0, in3);
/* find absolute value */
out4 = (in4 > 0) ? in4 : (q31_t)__QSUB8(0, in4);
/* store result to destination */
*(pDst + 1) = (q7_t) out2;
/* store result to destination */
*(pDst + 2) = (q7_t) out3;
/* store result to destination */
*(pDst + 3) = (q7_t) out4;
/* update pointers to process next samples */
pSrc += 4u;
pDst += 4u;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
blkCnt = blockSize;
#endif /* #define ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C = |A| */
/* Read the input */
in = *pSrc++;
/* Store the Absolute result in the destination buffer */
*pDst++ = (in > 0) ? in : ((in == (q7_t) 0x80) ? 0x7f : -in);
/* Decrement the loop counter */
blkCnt--;
}
}
/**
* @} end of BasicAbs group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_add_f32.c
*
* Description: Floating-point vector addition.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @defgroup BasicAdd Vector Addition
*
* Element-by-element addition of two vectors.
*
* <pre>
* pDst[n] = pSrcA[n] + pSrcB[n], 0 <= n < blockSize.
* </pre>
*
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
*/
/**
* @addtogroup BasicAdd
* @{
*/
/**
* @brief Floating-point vector addition.
* @param[in] *pSrcA points to the first input vector
* @param[in] *pSrcB points to the second input vector
* @param[out] *pDst points to the output vector
* @param[in] blockSize number of samples in each vector
* @return none.
*/
void arm_add_f32(
float32_t * pSrcA,
float32_t * pSrcB,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
float32_t inA1, inA2, inA3, inA4; /* temporary input variabels */
float32_t inB1, inB2, inB3, inB4; /* temporary input variables */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */
/* read four inputs from sourceA and four inputs from sourceB */
inA1 = *pSrcA;
inB1 = *pSrcB;
inA2 = *(pSrcA + 1);
inB2 = *(pSrcB + 1);
inA3 = *(pSrcA + 2);
inB3 = *(pSrcB + 2);
inA4 = *(pSrcA + 3);
inB4 = *(pSrcB + 3);
/* C = A + B */
/* add and store result to destination */
*pDst = inA1 + inB1;
*(pDst + 1) = inA2 + inB2;
*(pDst + 2) = inA3 + inB3;
*(pDst + 3) = inA4 + inB4;
/* update pointers to process next samples */
pSrcA += 4u;
pSrcB += 4u;
pDst += 4u;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */
*pDst++ = (*pSrcA++) + (*pSrcB++);
/* Decrement the loop counter */
blkCnt--;
}
}
/**
* @} end of BasicAdd group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_add_q15.c
*
* Description: Q15 vector addition
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @addtogroup BasicAdd
* @{
*/
/**
* @brief Q15 vector addition.
* @param[in] *pSrcA points to the first input vector
* @param[in] *pSrcB points to the second input vector
* @param[out] *pDst points to the output vector
* @param[in] blockSize number of samples in each vector
* @return none.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function uses saturating arithmetic.
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
*/
void arm_add_q15(
q15_t * pSrcA,
q15_t * pSrcB,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t inA1, inA2, inB1, inB2;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */
inA1 = *__SIMD32(pSrcA)++;
inA2 = *__SIMD32(pSrcA)++;
inB1 = *__SIMD32(pSrcB)++;
inB2 = *__SIMD32(pSrcB)++;
*__SIMD32(pDst)++ = __QADD16(inA1, inB1);
*__SIMD32(pDst)++ = __QADD16(inA2, inB2);
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */
*pDst++ = (q15_t) __QADD16(*pSrcA++, *pSrcB++);
/* Decrement the loop counter */
blkCnt--;
}
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */
*pDst++ = (q15_t) __SSAT(((q31_t) * pSrcA++ + *pSrcB++), 16);
/* Decrement the loop counter */
blkCnt--;
}
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
}
/**
* @} end of BasicAdd group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_add_q31.c
*
* Description: Q31 vector addition.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @addtogroup BasicAdd
* @{
*/
/**
* @brief Q31 vector addition.
* @param[in] *pSrcA points to the first input vector
* @param[in] *pSrcB points to the second input vector
* @param[out] *pDst points to the output vector
* @param[in] blockSize number of samples in each vector
* @return none.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function uses saturating arithmetic.
* Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] will be saturated.
*/
void arm_add_q31(
q31_t * pSrcA,
q31_t * pSrcB,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t inA1, inA2, inA3, inA4;
q31_t inB1, inB2, inB3, inB4;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */
inA1 = *pSrcA++;
inA2 = *pSrcA++;
inB1 = *pSrcB++;
inB2 = *pSrcB++;
inA3 = *pSrcA++;
inA4 = *pSrcA++;
inB3 = *pSrcB++;
inB4 = *pSrcB++;
*pDst++ = __QADD(inA1, inB1);
*pDst++ = __QADD(inA2, inB2);
*pDst++ = __QADD(inA3, inB3);
*pDst++ = __QADD(inA4, inB4);
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */
*pDst++ = __QADD(*pSrcA++, *pSrcB++);
/* Decrement the loop counter */
blkCnt--;
}
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrcA++ + *pSrcB++);
/* Decrement the loop counter */
blkCnt--;
}
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
}
/**
* @} end of BasicAdd group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_add_q7.c
*
* Description: Q7 vector addition.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @addtogroup BasicAdd
* @{
*/
/**
* @brief Q7 vector addition.
* @param[in] *pSrcA points to the first input vector
* @param[in] *pSrcB points to the second input vector
* @param[out] *pDst points to the output vector
* @param[in] blockSize number of samples in each vector
* @return none.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function uses saturating arithmetic.
* Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
*/
void arm_add_q7(
q7_t * pSrcA,
q7_t * pSrcB,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */
*__SIMD32(pDst)++ = __QADD8(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++);
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */
*pDst++ = (q7_t) __SSAT(*pSrcA++ + *pSrcB++, 8);
/* Decrement the loop counter */
blkCnt--;
}
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */
*pDst++ = (q7_t) __SSAT((q15_t) * pSrcA++ + *pSrcB++, 8);
/* Decrement the loop counter */
blkCnt--;
}
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
}
/**
* @} end of BasicAdd group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_dot_prod_f32.c
*
* Description: Floating-point dot product.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @defgroup dot_prod Vector Dot Product
*
* Computes the dot product of two vectors.
* The vectors are multiplied element-by-element and then summed.
*
* <pre>
* sum = pSrcA[0]*pSrcB[0] + pSrcA[1]*pSrcB[1] + ... + pSrcA[blockSize-1]*pSrcB[blockSize-1]
* </pre>
*
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
*/
/**
* @addtogroup dot_prod
* @{
*/
/**
* @brief Dot product of floating-point vectors.
* @param[in] *pSrcA points to the first input vector
* @param[in] *pSrcB points to the second input vector
* @param[in] blockSize number of samples in each vector
* @param[out] *result output result returned here
* @return none.
*/
void arm_dot_prod_f32(
float32_t * pSrcA,
float32_t * pSrcB,
uint32_t blockSize,
float32_t * result)
{
float32_t sum = 0.0f; /* Temporary result storage */
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and then store the result in a temporary buffer */
sum += (*pSrcA++) * (*pSrcB++);
sum += (*pSrcA++) * (*pSrcB++);
sum += (*pSrcA++) * (*pSrcB++);
sum += (*pSrcA++) * (*pSrcB++);
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and then store the result in a temporary buffer. */
sum += (*pSrcA++) * (*pSrcB++);
/* Decrement the loop counter */
blkCnt--;
}
/* Store the result back in the destination buffer */
*result = sum;
}
/**
* @} end of dot_prod group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_dot_prod_q15.c
*
* Description: Q15 dot product.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @addtogroup dot_prod
* @{
*/
/**
* @brief Dot product of Q15 vectors.
* @param[in] *pSrcA points to the first input vector
* @param[in] *pSrcB points to the second input vector
* @param[in] blockSize number of samples in each vector
* @param[out] *result output result returned here
* @return none.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The intermediate multiplications are in 1.15 x 1.15 = 2.30 format and these
* results are added to a 64-bit accumulator in 34.30 format.
* Nonsaturating additions are used and given that there are 33 guard bits in the accumulator
* there is no risk of overflow.
* The return result is in 34.30 format.
*/
void arm_dot_prod_q15(
q15_t * pSrcA,
q15_t * pSrcB,
uint32_t blockSize,
q63_t * result)
{
q63_t sum = 0; /* Temporary result storage */
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and then store the result in a temporary buffer. */
sum = __SMLALD(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++, sum);
sum = __SMLALD(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++, sum);
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and then store the results in a temporary buffer. */
sum = __SMLALD(*pSrcA++, *pSrcB++, sum);
/* Decrement the loop counter */
blkCnt--;
}
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and then store the results in a temporary buffer. */
sum += (q63_t) ((q31_t) * pSrcA++ * *pSrcB++);
/* Decrement the loop counter */
blkCnt--;
}
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
/* Store the result in the destination buffer in 34.30 format */
*result = sum;
}
/**
* @} end of dot_prod group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_dot_prod_q31.c
*
* Description: Q31 dot product.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @addtogroup dot_prod
* @{
*/
/**
* @brief Dot product of Q31 vectors.
* @param[in] *pSrcA points to the first input vector
* @param[in] *pSrcB points to the second input vector
* @param[in] blockSize number of samples in each vector
* @param[out] *result output result returned here
* @return none.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The intermediate multiplications are in 1.31 x 1.31 = 2.62 format and these
* are truncated to 2.48 format by discarding the lower 14 bits.
* The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format.
* There are 15 guard bits in the accumulator and there is no risk of overflow as long as
* the length of the vectors is less than 2^16 elements.
* The return result is in 16.48 format.
*/
void arm_dot_prod_q31(
q31_t * pSrcA,
q31_t * pSrcB,
uint32_t blockSize,
q63_t * result)
{
q63_t sum = 0; /* Temporary result storage */
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t inA1, inA2, inA3, inA4;
q31_t inB1, inB2, inB3, inB4;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and then store the result in a temporary buffer. */
inA1 = *pSrcA++;
inA2 = *pSrcA++;
inA3 = *pSrcA++;
inA4 = *pSrcA++;
inB1 = *pSrcB++;
inB2 = *pSrcB++;
inB3 = *pSrcB++;
inB4 = *pSrcB++;
sum += ((q63_t) inA1 * inB1) >> 14u;
sum += ((q63_t) inA2 * inB2) >> 14u;
sum += ((q63_t) inA3 * inB3) >> 14u;
sum += ((q63_t) inA4 * inB4) >> 14u;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and then store the result in a temporary buffer. */
sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14u;
/* Decrement the loop counter */
blkCnt--;
}
/* Store the result in the destination buffer in 16.48 format */
*result = sum;
}
/**
* @} end of dot_prod group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_dot_prod_q7.c
*
* Description: Q7 dot product.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @addtogroup dot_prod
* @{
*/
/**
* @brief Dot product of Q7 vectors.
* @param[in] *pSrcA points to the first input vector
* @param[in] *pSrcB points to the second input vector
* @param[in] blockSize number of samples in each vector
* @param[out] *result output result returned here
* @return none.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The intermediate multiplications are in 1.7 x 1.7 = 2.14 format and these
* results are added to an accumulator in 18.14 format.
* Nonsaturating additions are used and there is no danger of wrap around as long as
* the vectors are less than 2^18 elements long.
* The return result is in 18.14 format.
*/
void arm_dot_prod_q7(
q7_t * pSrcA,
q7_t * pSrcB,
uint32_t blockSize,
q31_t * result)
{
uint32_t blkCnt; /* loop counter */
q31_t sum = 0; /* Temporary variables to store output */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t input1, input2; /* Temporary variables to store input */
q31_t inA1, inA2, inB1, inB2; /* Temporary variables to store input */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* read 4 samples at a time from sourceA */
input1 = *__SIMD32(pSrcA)++;
/* read 4 samples at a time from sourceB */
input2 = *__SIMD32(pSrcB)++;
/* extract two q7_t samples to q15_t samples */
inA1 = __SXTB16(__ROR(input1, 8));
/* extract reminaing two samples */
inA2 = __SXTB16(input1);
/* extract two q7_t samples to q15_t samples */
inB1 = __SXTB16(__ROR(input2, 8));
/* extract reminaing two samples */
inB2 = __SXTB16(input2);
/* multiply and accumulate two samples at a time */
sum = __SMLAD(inA1, inB1, sum);
sum = __SMLAD(inA2, inB2, sum);
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Dot product and then store the results in a temporary buffer. */
sum = __SMLAD(*pSrcA++, *pSrcB++, sum);
/* Decrement the loop counter */
blkCnt--;
}
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Dot product and then store the results in a temporary buffer. */
sum += (q31_t) ((q15_t) * pSrcA++ * *pSrcB++);
/* Decrement the loop counter */
blkCnt--;
}
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
/* Store the result in the destination buffer in 18.14 format */
*result = sum;
}
/**
* @} end of dot_prod group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_mult_f32.c
*
* Description: Floating-point vector multiplication.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @defgroup BasicMult Vector Multiplication
*
* Element-by-element multiplication of two vectors.
*
* <pre>
* pDst[n] = pSrcA[n] * pSrcB[n], 0 <= n < blockSize.
* </pre>
*
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
*/
/**
* @addtogroup BasicMult
* @{
*/
/**
* @brief Floating-point vector multiplication.
* @param[in] *pSrcA points to the first input vector
* @param[in] *pSrcB points to the second input vector
* @param[out] *pDst points to the output vector
* @param[in] blockSize number of samples in each vector
* @return none.
*/
void arm_mult_f32(
float32_t * pSrcA,
float32_t * pSrcB,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
float32_t inA1, inA2, inA3, inA4; /* temporary input variables */
float32_t inB1, inB2, inB3, inB4; /* temporary input variables */
float32_t out1, out2, out3, out4; /* temporary output variables */
/* loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A * B */
/* Multiply the inputs and store the results in output buffer */
/* read sample from sourceA */
inA1 = *pSrcA;
/* read sample from sourceB */
inB1 = *pSrcB;
/* read sample from sourceA */
inA2 = *(pSrcA + 1);
/* read sample from sourceB */
inB2 = *(pSrcB + 1);
/* out = sourceA * sourceB */
out1 = inA1 * inB1;
/* read sample from sourceA */
inA3 = *(pSrcA + 2);
/* read sample from sourceB */
inB3 = *(pSrcB + 2);
/* out = sourceA * sourceB */
out2 = inA2 * inB2;
/* read sample from sourceA */
inA4 = *(pSrcA + 3);
/* store result to destination buffer */
*pDst = out1;
/* read sample from sourceB */
inB4 = *(pSrcB + 3);
/* out = sourceA * sourceB */
out3 = inA3 * inB3;
/* store result to destination buffer */
*(pDst + 1) = out2;
/* out = sourceA * sourceB */
out4 = inA4 * inB4;
/* store result to destination buffer */
*(pDst + 2) = out3;
/* store result to destination buffer */
*(pDst + 3) = out4;
/* update pointers to process next samples */
pSrcA += 4u;
pSrcB += 4u;
pDst += 4u;
/* Decrement the blockSize loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C = A * B */
/* Multiply the inputs and store the results in output buffer */
*pDst++ = (*pSrcA++) * (*pSrcB++);
/* Decrement the blockSize loop counter */
blkCnt--;
}
}
/**
* @} end of BasicMult group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. October 2015
* $Revision: V.1.4.5 a
*
* Project: CMSIS DSP Library
* Title: arm_mult_q15.c
*
* Description: Q15 vector multiplication.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @addtogroup BasicMult
* @{
*/
/**
* @brief Q15 vector multiplication
* @param[in] *pSrcA points to the first input vector
* @param[in] *pSrcB points to the second input vector
* @param[out] *pDst points to the output vector
* @param[in] blockSize number of samples in each vector
* @return none.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function uses saturating arithmetic.
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
*/
void arm_mult_q15(
q15_t * pSrcA,
q15_t * pSrcB,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t inA1, inA2, inB1, inB2; /* temporary input variables */
q15_t out1, out2, out3, out4; /* temporary output variables */
q31_t mul1, mul2, mul3, mul4; /* temporary variables */
/* loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* read two samples at a time from sourceA */
inA1 = *__SIMD32(pSrcA)++;
/* read two samples at a time from sourceB */
inB1 = *__SIMD32(pSrcB)++;
/* read two samples at a time from sourceA */
inA2 = *__SIMD32(pSrcA)++;
/* read two samples at a time from sourceB */
inB2 = *__SIMD32(pSrcB)++;
/* multiply mul = sourceA * sourceB */
mul1 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
mul2 = (q31_t) ((q15_t) inA1 * (q15_t) inB1);
mul3 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB2 >> 16));
mul4 = (q31_t) ((q15_t) inA2 * (q15_t) inB2);
/* saturate result to 16 bit */
out1 = (q15_t) __SSAT(mul1 >> 15, 16);
out2 = (q15_t) __SSAT(mul2 >> 15, 16);
out3 = (q15_t) __SSAT(mul3 >> 15, 16);
out4 = (q15_t) __SSAT(mul4 >> 15, 16);
/* store the result */
#ifndef ARM_MATH_BIG_ENDIAN
*__SIMD32(pDst)++ = __PKHBT(out2, out1, 16);
*__SIMD32(pDst)++ = __PKHBT(out4, out3, 16);
#else
*__SIMD32(pDst)++ = __PKHBT(out2, out1, 16);
*__SIMD32(pDst)++ = __PKHBT(out4, out3, 16);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* Decrement the blockSize loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C = A * B */
/* Multiply the inputs and store the result in the destination buffer */
*pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
/* Decrement the blockSize loop counter */
blkCnt--;
}
}
/**
* @} end of BasicMult group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_mult_q31.c
*
* Description: Q31 vector multiplication.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @addtogroup BasicMult
* @{
*/
/**
* @brief Q31 vector multiplication.
* @param[in] *pSrcA points to the first input vector
* @param[in] *pSrcB points to the second input vector
* @param[out] *pDst points to the output vector
* @param[in] blockSize number of samples in each vector
* @return none.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function uses saturating arithmetic.
* Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] will be saturated.
*/
void arm_mult_q31(
q31_t * pSrcA,
q31_t * pSrcB,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t inA1, inA2, inA3, inA4; /* temporary input variables */
q31_t inB1, inB2, inB3, inB4; /* temporary input variables */
q31_t out1, out2, out3, out4; /* temporary output variables */
/* loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A * B */
/* Multiply the inputs and then store the results in the destination buffer. */
inA1 = *pSrcA++;
inA2 = *pSrcA++;
inA3 = *pSrcA++;
inA4 = *pSrcA++;
inB1 = *pSrcB++;
inB2 = *pSrcB++;
inB3 = *pSrcB++;
inB4 = *pSrcB++;
out1 = ((q63_t) inA1 * inB1) >> 32;
out2 = ((q63_t) inA2 * inB2) >> 32;
out3 = ((q63_t) inA3 * inB3) >> 32;
out4 = ((q63_t) inA4 * inB4) >> 32;
out1 = __SSAT(out1, 31);
out2 = __SSAT(out2, 31);
out3 = __SSAT(out3, 31);
out4 = __SSAT(out4, 31);
*pDst++ = out1 << 1u;
*pDst++ = out2 << 1u;
*pDst++ = out3 << 1u;
*pDst++ = out4 << 1u;
/* Decrement the blockSize loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = A * B */
/* Multiply the inputs and then store the results in the destination buffer. */
inA1 = *pSrcA++;
inB1 = *pSrcB++;
out1 = ((q63_t) inA1 * inB1) >> 32;
out1 = __SSAT(out1, 31);
*pDst++ = out1 << 1u;
/* Decrement the blockSize loop counter */
blkCnt--;
}
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = A * B */
/* Multiply the inputs and then store the results in the destination buffer. */
*pDst++ =
(q31_t) clip_q63_to_q31(((q63_t) (*pSrcA++) * (*pSrcB++)) >> 31);
/* Decrement the blockSize loop counter */
blkCnt--;
}
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
}
/**
* @} end of BasicMult group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_mult_q7.c
*
* Description: Q7 vector multiplication.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @addtogroup BasicMult
* @{
*/
/**
* @brief Q7 vector multiplication
* @param[in] *pSrcA points to the first input vector
* @param[in] *pSrcB points to the second input vector
* @param[out] *pDst points to the output vector
* @param[in] blockSize number of samples in each vector
* @return none.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function uses saturating arithmetic.
* Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
*/
void arm_mult_q7(
q7_t * pSrcA,
q7_t * pSrcB,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q7_t out1, out2, out3, out4; /* Temporary variables to store the product */
/* loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A * B */
/* Multiply the inputs and store the results in temporary variables */
out1 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
out2 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
out3 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
out4 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
/* Store the results of 4 inputs in the destination buffer in single cycle by packing */
*__SIMD32(pDst)++ = __PACKq7(out1, out2, out3, out4);
/* Decrement the blockSize loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C = A * B */
/* Multiply the inputs and store the result in the destination buffer */
*pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
/* Decrement the blockSize loop counter */
blkCnt--;
}
}
/**
* @} end of BasicMult group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_negate_f32.c
*
* Description: Negates floating-point vectors.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @defgroup negate Vector Negate
*
* Negates the elements of a vector.
*
* <pre>
* pDst[n] = -pSrc[n], 0 <= n < blockSize.
* </pre>
*
* The functions support in-place computation allowing the source and
* destination pointers to reference the same memory buffer.
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
*/
/**
* @addtogroup negate
* @{
*/
/**
* @brief Negates the elements of a floating-point vector.
* @param[in] *pSrc points to the input vector
* @param[out] *pDst points to the output vector
* @param[in] blockSize number of samples in the vector
* @return none.
*/
void arm_negate_f32(
float32_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
float32_t in1, in2, in3, in4; /* temporary variables */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* read inputs from source */
in1 = *pSrc;
in2 = *(pSrc + 1);
in3 = *(pSrc + 2);
in4 = *(pSrc + 3);
/* negate the input */
in1 = -in1;
in2 = -in2;
in3 = -in3;
in4 = -in4;
/* store the result to destination */
*pDst = in1;
*(pDst + 1) = in2;
*(pDst + 2) = in3;
*(pDst + 3) = in4;
/* update pointers to process next samples */
pSrc += 4u;
pDst += 4u;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C = -A */
/* Negate and then store the results in the destination buffer. */
*pDst++ = -*pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
}
/**
* @} end of negate group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_negate_q15.c
*
* Description: Negates Q15 vectors.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @addtogroup negate
* @{
*/
/**
* @brief Negates the elements of a Q15 vector.
* @param[in] *pSrc points to the input vector
* @param[out] *pDst points to the output vector
* @param[in] blockSize number of samples in the vector
* @return none.
*
* \par Conditions for optimum performance
* Input and output buffers should be aligned by 32-bit
*
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function uses saturating arithmetic.
* The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.
*/
void arm_negate_q15(
q15_t * pSrc,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counter */
q15_t in;
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t in1, in2; /* Temporary variables */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = -A */
/* Read two inputs at a time */
in1 = _SIMD32_OFFSET(pSrc);
in2 = _SIMD32_OFFSET(pSrc + 2);
/* negate two samples at a time */
in1 = __QSUB16(0, in1);
/* negate two samples at a time */
in2 = __QSUB16(0, in2);
/* store the result to destination 2 samples at a time */
_SIMD32_OFFSET(pDst) = in1;
/* store the result to destination 2 samples at a time */
_SIMD32_OFFSET(pDst + 2) = in2;
/* update pointers to process next samples */
pSrc += 4u;
pDst += 4u;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C = -A */
/* Negate and then store the result in the destination buffer. */
in = *pSrc++;
*pDst++ = (in == (q15_t) 0x8000) ? 0x7fff : -in;
/* Decrement the loop counter */
blkCnt--;
}
}
/**
* @} end of negate group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_negate_q31.c
*
* Description: Negates Q31 vectors.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @addtogroup negate
* @{
*/
/**
* @brief Negates the elements of a Q31 vector.
* @param[in] *pSrc points to the input vector
* @param[out] *pDst points to the output vector
* @param[in] blockSize number of samples in the vector
* @return none.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function uses saturating arithmetic.
* The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF.
*/
void arm_negate_q31(
q31_t * pSrc,
q31_t * pDst,
uint32_t blockSize)
{
q31_t in; /* Temporary variable */
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t in1, in2, in3, in4;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = -A */
/* Negate and then store the results in the destination buffer. */
in1 = *pSrc++;
in2 = *pSrc++;
in3 = *pSrc++;
in4 = *pSrc++;
*pDst++ = __QSUB(0, in1);
*pDst++ = __QSUB(0, in2);
*pDst++ = __QSUB(0, in3);
*pDst++ = __QSUB(0, in4);
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C = -A */
/* Negate and then store the result in the destination buffer. */
in = *pSrc++;
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
/* Decrement the loop counter */
blkCnt--;
}
}
/**
* @} end of negate group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_negate_q7.c
*
* Description: Negates Q7 vectors.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @addtogroup negate
* @{
*/
/**
* @brief Negates the elements of a Q7 vector.
* @param[in] *pSrc points to the input vector
* @param[out] *pDst points to the output vector
* @param[in] blockSize number of samples in the vector
* @return none.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function uses saturating arithmetic.
* The Q7 value -1 (0x80) will be saturated to the maximum allowable positive value 0x7F.
*/
void arm_negate_q7(
q7_t * pSrc,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counter */
q7_t in;
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t input; /* Input values1-4 */
q31_t zero = 0x00000000;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = -A */
/* Read four inputs */
input = *__SIMD32(pSrc)++;
/* Store the Negated results in the destination buffer in a single cycle by packing the results */
*__SIMD32(pDst)++ = __QSUB8(zero, input);
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C = -A */
/* Negate and then store the results in the destination buffer. */ \
in = *pSrc++;
*pDst++ = (in == (q7_t) 0x80) ? 0x7f : -in;
/* Decrement the loop counter */
blkCnt--;
}
}
/**
* @} end of negate group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_offset_f32.c
*
* Description: Floating-point vector offset.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @defgroup offset Vector Offset
*
* Adds a constant offset to each element of a vector.
*
* <pre>
* pDst[n] = pSrc[n] + offset, 0 <= n < blockSize.
* </pre>
*
* The functions support in-place computation allowing the source and
* destination pointers to reference the same memory buffer.
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
*/
/**
* @addtogroup offset
* @{
*/
/**
* @brief Adds a constant offset to a floating-point vector.
* @param[in] *pSrc points to the input vector
* @param[in] offset is the offset to be added
* @param[out] *pDst points to the output vector
* @param[in] blockSize number of samples in the vector
* @return none.
*/
void arm_offset_f32(
float32_t * pSrc,
float32_t offset,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
float32_t in1, in2, in3, in4;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A + offset */
/* Add offset and then store the results in the destination buffer. */
/* read samples from source */
in1 = *pSrc;
in2 = *(pSrc + 1);
/* add offset to input */
in1 = in1 + offset;
/* read samples from source */
in3 = *(pSrc + 2);
/* add offset to input */
in2 = in2 + offset;
/* read samples from source */
in4 = *(pSrc + 3);
/* add offset to input */
in3 = in3 + offset;
/* store result to destination */
*pDst = in1;
/* add offset to input */
in4 = in4 + offset;
/* store result to destination */
*(pDst + 1) = in2;
/* store result to destination */
*(pDst + 2) = in3;
/* store result to destination */
*(pDst + 3) = in4;
/* update pointers to process next samples */
pSrc += 4u;
pDst += 4u;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C = A + offset */
/* Add offset and then store the result in the destination buffer. */
*pDst++ = (*pSrc++) + offset;
/* Decrement the loop counter */
blkCnt--;
}
}
/**
* @} end of offset group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_offset_q15.c
*
* Description: Q15 vector offset.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @addtogroup offset
* @{
*/
/**
* @brief Adds a constant offset to a Q15 vector.
* @param[in] *pSrc points to the input vector
* @param[in] offset is the offset to be added
* @param[out] *pDst points to the output vector
* @param[in] blockSize number of samples in the vector
* @return none.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function uses saturating arithmetic.
* Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
*/
void arm_offset_q15(
q15_t * pSrc,
q15_t offset,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t offset_packed; /* Offset packed to 32 bit */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* Offset is packed to 32 bit in order to use SIMD32 for addition */
offset_packed = __PKHBT(offset, offset, 16);
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A + offset */
/* Add offset and then store the results in the destination buffer, 2 samples at a time. */
*__SIMD32(pDst)++ = __QADD16(*__SIMD32(pSrc)++, offset_packed);
*__SIMD32(pDst)++ = __QADD16(*__SIMD32(pSrc)++, offset_packed);
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = A + offset */
/* Add offset and then store the results in the destination buffer. */
*pDst++ = (q15_t) __QADD16(*pSrc++, offset);
/* Decrement the loop counter */
blkCnt--;
}
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = A + offset */
/* Add offset and then store the results in the destination buffer. */
*pDst++ = (q15_t) __SSAT(((q31_t) * pSrc++ + offset), 16);
/* Decrement the loop counter */
blkCnt--;
}
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
}
/**
* @} end of offset group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_offset_q31.c
*
* Description: Q31 vector offset.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @addtogroup offset
* @{
*/
/**
* @brief Adds a constant offset to a Q31 vector.
* @param[in] *pSrc points to the input vector
* @param[in] offset is the offset to be added
* @param[out] *pDst points to the output vector
* @param[in] blockSize number of samples in the vector
* @return none.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function uses saturating arithmetic.
* Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
*/
void arm_offset_q31(
q31_t * pSrc,
q31_t offset,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t in1, in2, in3, in4;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A + offset */
/* Add offset and then store the results in the destination buffer. */
in1 = *pSrc++;
in2 = *pSrc++;
in3 = *pSrc++;
in4 = *pSrc++;
*pDst++ = __QADD(in1, offset);
*pDst++ = __QADD(in2, offset);
*pDst++ = __QADD(in3, offset);
*pDst++ = __QADD(in4, offset);
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = A + offset */
/* Add offset and then store the result in the destination buffer. */
*pDst++ = __QADD(*pSrc++, offset);
/* Decrement the loop counter */
blkCnt--;
}
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = A + offset */
/* Add offset and then store the result in the destination buffer. */
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrc++ + offset);
/* Decrement the loop counter */
blkCnt--;
}
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
}
/**
* @} end of offset group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_offset_q7.c
*
* Description: Q7 vector offset.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @addtogroup offset
* @{
*/
/**
* @brief Adds a constant offset to a Q7 vector.
* @param[in] *pSrc points to the input vector
* @param[in] offset is the offset to be added
* @param[out] *pDst points to the output vector
* @param[in] blockSize number of samples in the vector
* @return none.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function uses saturating arithmetic.
* Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
*/
void arm_offset_q7(
q7_t * pSrc,
q7_t offset,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t offset_packed; /* Offset packed to 32 bit */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* Offset is packed to 32 bit in order to use SIMD32 for addition */
offset_packed = __PACKq7(offset, offset, offset, offset);
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A + offset */
/* Add offset and then store the results in the destination bufferfor 4 samples at a time. */
*__SIMD32(pDst)++ = __QADD8(*__SIMD32(pSrc)++, offset_packed);
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = A + offset */
/* Add offset and then store the result in the destination buffer. */
*pDst++ = (q7_t) __SSAT(*pSrc++ + offset, 8);
/* Decrement the loop counter */
blkCnt--;
}
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = A + offset */
/* Add offset and then store the result in the destination buffer. */
*pDst++ = (q7_t) __SSAT((q15_t) * pSrc++ + offset, 8);
/* Decrement the loop counter */
blkCnt--;
}
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
}
/**
* @} end of offset group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_scale_f32.c
*
* Description: Multiplies a floating-point vector by a scalar.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @defgroup scale Vector Scale
*
* Multiply a vector by a scalar value. For floating-point data, the algorithm used is:
*
* <pre>
* pDst[n] = pSrc[n] * scale, 0 <= n < blockSize.
* </pre>
*
* In the fixed-point Q7, Q15, and Q31 functions, <code>scale</code> is represented by
* a fractional multiplication <code>scaleFract</code> and an arithmetic shift <code>shift</code>.
* The shift allows the gain of the scaling operation to exceed 1.0.
* The algorithm used with fixed-point data is:
*
* <pre>
* pDst[n] = (pSrc[n] * scaleFract) << shift, 0 <= n < blockSize.
* </pre>
*
* The overall scale factor applied to the fixed-point data is
* <pre>
* scale = scaleFract * 2^shift.
* </pre>
*
* The functions support in-place computation allowing the source and destination
* pointers to reference the same memory buffer.
*/
/**
* @addtogroup scale
* @{
*/
/**
* @brief Multiplies a floating-point vector by a scalar.
* @param[in] *pSrc points to the input vector
* @param[in] scale scale factor to be applied
* @param[out] *pDst points to the output vector
* @param[in] blockSize number of samples in the vector
* @return none.
*/
void arm_scale_f32(
float32_t * pSrc,
float32_t scale,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
float32_t in1, in2, in3, in4; /* temporary variabels */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A * scale */
/* Scale the input and then store the results in the destination buffer. */
/* read input samples from source */
in1 = *pSrc;
in2 = *(pSrc + 1);
/* multiply with scaling factor */
in1 = in1 * scale;
/* read input sample from source */
in3 = *(pSrc + 2);
/* multiply with scaling factor */
in2 = in2 * scale;
/* read input sample from source */
in4 = *(pSrc + 3);
/* multiply with scaling factor */
in3 = in3 * scale;
in4 = in4 * scale;
/* store the result to destination */
*pDst = in1;
*(pDst + 1) = in2;
*(pDst + 2) = in3;
*(pDst + 3) = in4;
/* update pointers to process next samples */
pSrc += 4u;
pDst += 4u;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C = A * scale */
/* Scale the input and then store the result in the destination buffer. */
*pDst++ = (*pSrc++) * scale;
/* Decrement the loop counter */
blkCnt--;
}
}
/**
* @} end of scale group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_scale_q15.c
*
* Description: Multiplies a Q15 vector by a scalar.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @addtogroup scale
* @{
*/
/**
* @brief Multiplies a Q15 vector by a scalar.
* @param[in] *pSrc points to the input vector
* @param[in] scaleFract fractional portion of the scale value
* @param[in] shift number of bits to shift the result by
* @param[out] *pDst points to the output vector
* @param[in] blockSize number of samples in the vector
* @return none.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.15 format.
* These are multiplied to yield a 2.30 intermediate result and this is shifted with saturation to 1.15 format.
*/
void arm_scale_q15(
q15_t * pSrc,
q15_t scaleFract,
int8_t shift,
q15_t * pDst,
uint32_t blockSize)
{
int8_t kShift = 15 - shift; /* shift to apply after scaling */
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q15_t in1, in2, in3, in4;
q31_t inA1, inA2; /* Temporary variables */
q31_t out1, out2, out3, out4;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* Reading 2 inputs from memory */
inA1 = *__SIMD32(pSrc)++;
inA2 = *__SIMD32(pSrc)++;
/* C = A * scale */
/* Scale the inputs and then store the 2 results in the destination buffer
* in single cycle by packing the outputs */
out1 = (q31_t) ((q15_t) (inA1 >> 16) * scaleFract);
out2 = (q31_t) ((q15_t) inA1 * scaleFract);
out3 = (q31_t) ((q15_t) (inA2 >> 16) * scaleFract);
out4 = (q31_t) ((q15_t) inA2 * scaleFract);
/* apply shifting */
out1 = out1 >> kShift;
out2 = out2 >> kShift;
out3 = out3 >> kShift;
out4 = out4 >> kShift;
/* saturate the output */
in1 = (q15_t) (__SSAT(out1, 16));
in2 = (q15_t) (__SSAT(out2, 16));
in3 = (q15_t) (__SSAT(out3, 16));
in4 = (q15_t) (__SSAT(out4, 16));
/* store the result to destination */
*__SIMD32(pDst)++ = __PKHBT(in2, in1, 16);
*__SIMD32(pDst)++ = __PKHBT(in4, in3, 16);
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = A * scale */
/* Scale the input and then store the result in the destination buffer. */
*pDst++ = (q15_t) (__SSAT(((*pSrc++) * scaleFract) >> kShift, 16));
/* Decrement the loop counter */
blkCnt--;
}
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = A * scale */
/* Scale the input and then store the result in the destination buffer. */
*pDst++ = (q15_t) (__SSAT(((q31_t) * pSrc++ * scaleFract) >> kShift, 16));
/* Decrement the loop counter */
blkCnt--;
}
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
}
/**
* @} end of scale group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_scale_q31.c
*
* Description: Multiplies a Q31 vector by a scalar.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @addtogroup scale
* @{
*/
/**
* @brief Multiplies a Q31 vector by a scalar.
* @param[in] *pSrc points to the input vector
* @param[in] scaleFract fractional portion of the scale value
* @param[in] shift number of bits to shift the result by
* @param[out] *pDst points to the output vector
* @param[in] blockSize number of samples in the vector
* @return none.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.31 format.
* These are multiplied to yield a 2.62 intermediate result and this is shifted with saturation to 1.31 format.
*/
void arm_scale_q31(
q31_t * pSrc,
q31_t scaleFract,
int8_t shift,
q31_t * pDst,
uint32_t blockSize)
{
int8_t kShift = shift + 1; /* Shift to apply after scaling */
int8_t sign = (kShift & 0x80);
uint32_t blkCnt; /* loop counter */
q31_t in, out;
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q31_t in1, in2, in3, in4; /* temporary input variables */
q31_t out1, out2, out3, out4; /* temporary output variabels */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
if(sign == 0u)
{
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* read four inputs from source */
in1 = *pSrc;
in2 = *(pSrc + 1);
in3 = *(pSrc + 2);
in4 = *(pSrc + 3);
/* multiply input with scaler value */
in1 = ((q63_t) in1 * scaleFract) >> 32;
in2 = ((q63_t) in2 * scaleFract) >> 32;
in3 = ((q63_t) in3 * scaleFract) >> 32;
in4 = ((q63_t) in4 * scaleFract) >> 32;
/* apply shifting */
out1 = in1 << kShift;
out2 = in2 << kShift;
/* saturate the results. */
if(in1 != (out1 >> kShift))
out1 = 0x7FFFFFFF ^ (in1 >> 31);
if(in2 != (out2 >> kShift))
out2 = 0x7FFFFFFF ^ (in2 >> 31);
out3 = in3 << kShift;
out4 = in4 << kShift;
*pDst = out1;
*(pDst + 1) = out2;
if(in3 != (out3 >> kShift))
out3 = 0x7FFFFFFF ^ (in3 >> 31);
if(in4 != (out4 >> kShift))
out4 = 0x7FFFFFFF ^ (in4 >> 31);
/* Store result destination */
*(pDst + 2) = out3;
*(pDst + 3) = out4;
/* Update pointers to process next sampels */
pSrc += 4u;
pDst += 4u;
/* Decrement the loop counter */
blkCnt--;
}
}
else
{
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* read four inputs from source */
in1 = *pSrc;
in2 = *(pSrc + 1);
in3 = *(pSrc + 2);
in4 = *(pSrc + 3);
/* multiply input with scaler value */
in1 = ((q63_t) in1 * scaleFract) >> 32;
in2 = ((q63_t) in2 * scaleFract) >> 32;
in3 = ((q63_t) in3 * scaleFract) >> 32;
in4 = ((q63_t) in4 * scaleFract) >> 32;
/* apply shifting */
out1 = in1 >> -kShift;
out2 = in2 >> -kShift;
out3 = in3 >> -kShift;
out4 = in4 >> -kShift;
/* Store result destination */
*pDst = out1;
*(pDst + 1) = out2;
*(pDst + 2) = out3;
*(pDst + 3) = out4;
/* Update pointers to process next sampels */
pSrc += 4u;
pDst += 4u;
/* Decrement the loop counter */
blkCnt--;
}
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
if(sign == 0)
{
while(blkCnt > 0u)
{
/* C = A * scale */
/* Scale the input and then store the result in the destination buffer. */
in = *pSrc++;
in = ((q63_t) in * scaleFract) >> 32;
out = in << kShift;
if(in != (out >> kShift))
out = 0x7FFFFFFF ^ (in >> 31);
*pDst++ = out;
/* Decrement the loop counter */
blkCnt--;
}
}
else
{
while(blkCnt > 0u)
{
/* C = A * scale */
/* Scale the input and then store the result in the destination buffer. */
in = *pSrc++;
in = ((q63_t) in * scaleFract) >> 32;
out = in >> -kShift;
*pDst++ = out;
/* Decrement the loop counter */
blkCnt--;
}
}
}
/**
* @} end of scale group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_scale_q7.c
*
* Description: Multiplies a Q7 vector by a scalar.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @addtogroup scale
* @{
*/
/**
* @brief Multiplies a Q7 vector by a scalar.
* @param[in] *pSrc points to the input vector
* @param[in] scaleFract fractional portion of the scale value
* @param[in] shift number of bits to shift the result by
* @param[out] *pDst points to the output vector
* @param[in] blockSize number of samples in the vector
* @return none.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.7 format.
* These are multiplied to yield a 2.14 intermediate result and this is shifted with saturation to 1.7 format.
*/
void arm_scale_q7(
q7_t * pSrc,
q7_t scaleFract,
int8_t shift,
q7_t * pDst,
uint32_t blockSize)
{
int8_t kShift = 7 - shift; /* shift to apply after scaling */
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q7_t in1, in2, in3, in4, out1, out2, out3, out4; /* Temporary variables to store input & output */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* Reading 4 inputs from memory */
in1 = *pSrc++;
in2 = *pSrc++;
in3 = *pSrc++;
in4 = *pSrc++;
/* C = A * scale */
/* Scale the inputs and then store the results in the temporary variables. */
out1 = (q7_t) (__SSAT(((in1) * scaleFract) >> kShift, 8));
out2 = (q7_t) (__SSAT(((in2) * scaleFract) >> kShift, 8));
out3 = (q7_t) (__SSAT(((in3) * scaleFract) >> kShift, 8));
out4 = (q7_t) (__SSAT(((in4) * scaleFract) >> kShift, 8));
/* Packing the individual outputs into 32bit and storing in
* destination buffer in single write */
*__SIMD32(pDst)++ = __PACKq7(out1, out2, out3, out4);
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = A * scale */
/* Scale the input and then store the result in the destination buffer. */
*pDst++ = (q7_t) (__SSAT(((*pSrc++) * scaleFract) >> kShift, 8));
/* Decrement the loop counter */
blkCnt--;
}
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = A * scale */
/* Scale the input and then store the result in the destination buffer. */
*pDst++ = (q7_t) (__SSAT((((q15_t) * pSrc++ * scaleFract) >> kShift), 8));
/* Decrement the loop counter */
blkCnt--;
}
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
}
/**
* @} end of scale group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_shift_q15.c
*
* Description: Shifts the elements of a Q15 vector by a specified number of bits.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @addtogroup shift
* @{
*/
/**
* @brief Shifts the elements of a Q15 vector a specified number of bits.
* @param[in] *pSrc points to the input vector
* @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
* @param[out] *pDst points to the output vector
* @param[in] blockSize number of samples in the vector
* @return none.
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function uses saturating arithmetic.
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
*/
void arm_shift_q15(
q15_t * pSrc,
int8_t shiftBits,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counter */
uint8_t sign; /* Sign of shiftBits */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q15_t in1, in2; /* Temporary variables */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* Getting the sign of shiftBits */
sign = (shiftBits & 0x80);
/* If the shift value is positive then do right shift else left shift */
if(sign == 0u)
{
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* Read 2 inputs */
in1 = *pSrc++;
in2 = *pSrc++;
/* C = A << shiftBits */
/* Shift the inputs and then store the results in the destination buffer. */
#ifndef ARM_MATH_BIG_ENDIAN
*__SIMD32(pDst)++ = __PKHBT(__SSAT((in1 << shiftBits), 16),
__SSAT((in2 << shiftBits), 16), 16);
#else
*__SIMD32(pDst)++ = __PKHBT(__SSAT((in2 << shiftBits), 16),
__SSAT((in1 << shiftBits), 16), 16);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
in1 = *pSrc++;
in2 = *pSrc++;
#ifndef ARM_MATH_BIG_ENDIAN
*__SIMD32(pDst)++ = __PKHBT(__SSAT((in1 << shiftBits), 16),
__SSAT((in2 << shiftBits), 16), 16);
#else
*__SIMD32(pDst)++ = __PKHBT(__SSAT((in2 << shiftBits), 16),
__SSAT((in1 << shiftBits), 16), 16);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = A << shiftBits */
/* Shift and then store the results in the destination buffer. */
*pDst++ = __SSAT((*pSrc++ << shiftBits), 16);
/* Decrement the loop counter */
blkCnt--;
}
}
else
{
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* Read 2 inputs */
in1 = *pSrc++;
in2 = *pSrc++;
/* C = A >> shiftBits */
/* Shift the inputs and then store the results in the destination buffer. */
#ifndef ARM_MATH_BIG_ENDIAN
*__SIMD32(pDst)++ = __PKHBT((in1 >> -shiftBits),
(in2 >> -shiftBits), 16);
#else
*__SIMD32(pDst)++ = __PKHBT((in2 >> -shiftBits),
(in1 >> -shiftBits), 16);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
in1 = *pSrc++;
in2 = *pSrc++;
#ifndef ARM_MATH_BIG_ENDIAN
*__SIMD32(pDst)++ = __PKHBT((in1 >> -shiftBits),
(in2 >> -shiftBits), 16);
#else
*__SIMD32(pDst)++ = __PKHBT((in2 >> -shiftBits),
(in1 >> -shiftBits), 16);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = A >> shiftBits */
/* Shift the inputs and then store the results in the destination buffer. */
*pDst++ = (*pSrc++ >> -shiftBits);
/* Decrement the loop counter */
blkCnt--;
}
}
#else
/* Run the below code for Cortex-M0 */
/* Getting the sign of shiftBits */
sign = (shiftBits & 0x80);
/* If the shift value is positive then do right shift else left shift */
if(sign == 0u)
{
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = A << shiftBits */
/* Shift and then store the results in the destination buffer. */
*pDst++ = __SSAT(((q31_t) * pSrc++ << shiftBits), 16);
/* Decrement the loop counter */
blkCnt--;
}
}
else
{
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = A >> shiftBits */
/* Shift the inputs and then store the results in the destination buffer. */
*pDst++ = (*pSrc++ >> -shiftBits);
/* Decrement the loop counter */
blkCnt--;
}
}
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
}
/**
* @} end of shift group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_shift_q31.c
*
* Description: Shifts the elements of a Q31 vector by a specified number of bits.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @defgroup shift Vector Shift
*
* Shifts the elements of a fixed-point vector by a specified number of bits.
* There are separate functions for Q7, Q15, and Q31 data types.
* The underlying algorithm used is:
*
* <pre>
* pDst[n] = pSrc[n] << shift, 0 <= n < blockSize.
* </pre>
*
* If <code>shift</code> is positive then the elements of the vector are shifted to the left.
* If <code>shift</code> is negative then the elements of the vector are shifted to the right.
*
* The functions support in-place computation allowing the source and destination
* pointers to reference the same memory buffer.
*/
/**
* @addtogroup shift
* @{
*/
/**
* @brief Shifts the elements of a Q31 vector a specified number of bits.
* @param[in] *pSrc points to the input vector
* @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
* @param[out] *pDst points to the output vector
* @param[in] blockSize number of samples in the vector
* @return none.
*
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function uses saturating arithmetic.
* Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] will be saturated.
*/
void arm_shift_q31(
q31_t * pSrc,
int8_t shiftBits,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counter */
uint8_t sign = (shiftBits & 0x80); /* Sign of shiftBits */
#ifndef ARM_MATH_CM0_FAMILY
q31_t in1, in2, in3, in4; /* Temporary input variables */
q31_t out1, out2, out3, out4; /* Temporary output variables */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
if(sign == 0u)
{
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A << shiftBits */
/* Shift the input and then store the results in the destination buffer. */
in1 = *pSrc;
in2 = *(pSrc + 1);
out1 = in1 << shiftBits;
in3 = *(pSrc + 2);
out2 = in2 << shiftBits;
in4 = *(pSrc + 3);
if(in1 != (out1 >> shiftBits))
out1 = 0x7FFFFFFF ^ (in1 >> 31);
if(in2 != (out2 >> shiftBits))
out2 = 0x7FFFFFFF ^ (in2 >> 31);
*pDst = out1;
out3 = in3 << shiftBits;
*(pDst + 1) = out2;
out4 = in4 << shiftBits;
if(in3 != (out3 >> shiftBits))
out3 = 0x7FFFFFFF ^ (in3 >> 31);
if(in4 != (out4 >> shiftBits))
out4 = 0x7FFFFFFF ^ (in4 >> 31);
*(pDst + 2) = out3;
*(pDst + 3) = out4;
/* Update destination pointer to process next sampels */
pSrc += 4u;
pDst += 4u;
/* Decrement the loop counter */
blkCnt--;
}
}
else
{
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A >> shiftBits */
/* Shift the input and then store the results in the destination buffer. */
in1 = *pSrc;
in2 = *(pSrc + 1);
in3 = *(pSrc + 2);
in4 = *(pSrc + 3);
*pDst = (in1 >> -shiftBits);
*(pDst + 1) = (in2 >> -shiftBits);
*(pDst + 2) = (in3 >> -shiftBits);
*(pDst + 3) = (in4 >> -shiftBits);
pSrc += 4u;
pDst += 4u;
blkCnt--;
}
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C = A (>> or <<) shiftBits */
/* Shift the input and then store the result in the destination buffer. */
*pDst++ = (sign == 0u) ? clip_q63_to_q31((q63_t) * pSrc++ << shiftBits) :
(*pSrc++ >> -shiftBits);
/* Decrement the loop counter */
blkCnt--;
}
}
/**
* @} end of shift group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_shift_q7.c
*
* Description: Processing function for the Q7 Shifting
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @addtogroup shift
* @{
*/
/**
* @brief Shifts the elements of a Q7 vector a specified number of bits.
* @param[in] *pSrc points to the input vector
* @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
* @param[out] *pDst points to the output vector
* @param[in] blockSize number of samples in the vector
* @return none.
*
* \par Conditions for optimum performance
* Input and output buffers should be aligned by 32-bit
*
*
* <b>Scaling and Overflow Behavior:</b>
* \par
* The function uses saturating arithmetic.
* Results outside of the allowable Q7 range [0x8 0x7F] will be saturated.
*/
void arm_shift_q7(
q7_t * pSrc,
int8_t shiftBits,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counter */
uint8_t sign; /* Sign of shiftBits */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
q7_t in1; /* Input value1 */
q7_t in2; /* Input value2 */
q7_t in3; /* Input value3 */
q7_t in4; /* Input value4 */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* Getting the sign of shiftBits */
sign = (shiftBits & 0x80);
/* If the shift value is positive then do right shift else left shift */
if(sign == 0u)
{
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A << shiftBits */
/* Read 4 inputs */
in1 = *pSrc;
in2 = *(pSrc + 1);
in3 = *(pSrc + 2);
in4 = *(pSrc + 3);
/* Store the Shifted result in the destination buffer in single cycle by packing the outputs */
*__SIMD32(pDst)++ = __PACKq7(__SSAT((in1 << shiftBits), 8),
__SSAT((in2 << shiftBits), 8),
__SSAT((in3 << shiftBits), 8),
__SSAT((in4 << shiftBits), 8));
/* Update source pointer to process next sampels */
pSrc += 4u;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = A << shiftBits */
/* Shift the input and then store the result in the destination buffer. */
*pDst++ = (q7_t) __SSAT((*pSrc++ << shiftBits), 8);
/* Decrement the loop counter */
blkCnt--;
}
}
else
{
shiftBits = -shiftBits;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A >> shiftBits */
/* Read 4 inputs */
in1 = *pSrc;
in2 = *(pSrc + 1);
in3 = *(pSrc + 2);
in4 = *(pSrc + 3);
/* Store the Shifted result in the destination buffer in single cycle by packing the outputs */
*__SIMD32(pDst)++ = __PACKq7((in1 >> shiftBits), (in2 >> shiftBits),
(in3 >> shiftBits), (in4 >> shiftBits));
pSrc += 4u;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
while(blkCnt > 0u)
{
/* C = A >> shiftBits */
/* Shift the input and then store the result in the destination buffer. */
in1 = *pSrc++;
*pDst++ = (in1 >> shiftBits);
/* Decrement the loop counter */
blkCnt--;
}
}
#else
/* Run the below code for Cortex-M0 */
/* Getting the sign of shiftBits */
sign = (shiftBits & 0x80);
/* If the shift value is positive then do right shift else left shift */
if(sign == 0u)
{
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = A << shiftBits */
/* Shift the input and then store the result in the destination buffer. */
*pDst++ = (q7_t) __SSAT(((q15_t) * pSrc++ << shiftBits), 8);
/* Decrement the loop counter */
blkCnt--;
}
}
else
{
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while(blkCnt > 0u)
{
/* C = A >> shiftBits */
/* Shift the input and then store the result in the destination buffer. */
*pDst++ = (*pSrc++ >> -shiftBits);
/* Decrement the loop counter */
blkCnt--;
}
}
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
}
/**
* @} end of shift group
*/
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_sub_f32.c
*
* Description: Floating-point vector subtraction.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* ---------------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupMath
*/
/**
* @defgroup BasicSub Vector Subtraction
*
* Element-by-element subtraction of two vectors.
*
* <pre>
* pDst[n] = pSrcA[n] - pSrcB[n], 0 <= n < blockSize.
* </pre>
*
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
*/
/**
* @addtogroup BasicSub
* @{
*/
/**
* @brief Floating-point vector subtraction.
* @param[in] *pSrcA points to the first input vector
* @param[in] *pSrcB points to the second input vector
* @param[out] *pDst points to the output vector
* @param[in] blockSize number of samples in each vector
* @return none.
*/
void arm_sub_f32(
float32_t * pSrcA,
float32_t * pSrcB,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counter */
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
float32_t inA1, inA2, inA3, inA4; /* temporary variables */
float32_t inB1, inB2, inB3, inB4; /* temporary variables */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while(blkCnt > 0u)
{
/* C = A - B */
/* Subtract and then store the results in the destination buffer. */
/* Read 4 input samples from sourceA and sourceB */
inA1 = *pSrcA;
inB1 = *pSrcB;
inA2 = *(pSrcA + 1);
inB2 = *(pSrcB + 1);
inA3 = *(pSrcA + 2);
inB3 = *(pSrcB + 2);
inA4 = *(pSrcA + 3);
inB4 = *(pSrcB + 3);
/* dst = srcA - srcB */
/* subtract and store the result */
*pDst = inA1 - inB1;
*(pDst + 1) = inA2 - inB2;
*(pDst + 2) = inA3 - inB3;
*(pDst + 3) = inA4 - inB4;
/* Update pointers to process next sampels */
pSrcA += 4u;
pSrcB += 4u;
pDst += 4u;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
#else
/* Run the below code for Cortex-M0 */
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
while(blkCnt > 0u)
{
/* C = A - B */
/* Subtract and then store the results in the destination buffer. */
*pDst++ = (*pSrcA++) - (*pSrcB++);
/* Decrement the loop counter */
blkCnt--;
}
}
/**
* @} end of BasicSub group
*/
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment