zynq hls定点数计算

Julie ·
更新时间:2024-09-20
· 960 次阅读

在神经网络的fpga加速中,定点运算不仅比浮点运算更节省资源,还具有更快的速度,而且因为定点运算造成的神经网络的精度损失亦可忽略不计。本节介绍如何使用HLS进行定点运算以及如何与zynq cpu交互。

HLS中,有头文件ap_fixed.h,极大的方便了我们使用定点数,具体情况略。

HLS代码示例 #include #include typedef ap_fixed data_t; data_t fixed_test(volatile data_t *src1,volatile data_t *src2,volatile data_t *dest){ #pragma HLS INTERFACE m_axi depth=100 port=src1 offset=slave bundle=MASTER_BUS1 #pragma HLS INTERFACE m_axi depth=100 port=src2 offset=slave bundle=MASTER_BUS2 #pragma HLS INTERFACE m_axi depth=100 port=dest offset=slave bundle=MASTER_BUS3 #pragma HLS INTERFACE s_axilite port=return bundle=CRTL_BUS data_t buff1[100]; data_t buff2[100]; data_t buff3[100]; memcpy(buff1,(const data_t*)src1,10*sizeof(data_t)); memcpy(buff2,(const data_t*)src2,10*sizeof(data_t)); int i; for(i=0;i<100;i++){ buff3[i]=buff1[i]*buff2[i]; } memcpy((data_t*)dest,(const data_t*)buff3,10*sizeof(data_t)); return buff1[0]*buff2[0]; }

上图中,data_t数据类型为16位有符号定点数,其中整数位占8位,AP_RND和AP_SAT分别表示舍入和溢出的设置。程序实现了读取两个16位定点数数组,并且求积赋值给另一个数组的简单功能。

PS端代码

ps端,因为没有16为定点数类型,因此我们使用short类型来表示16位定点数,设该定点数为a,定点数小数部分8位,则short b=(short)(a*(2^8))的二进制表示即为该定点数的二进制表示。PL计算结束后将结果存在dest数组中,则float c=dest[i]/256.0的值就是最终结果。

/****************************************************************************** * * Copyright (C) 2009 - 2014 Xilinx, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * Use of the Software is limited solely to applications: * (a) running on a Xilinx device, or * (b) that interact with a Xilinx device through a bus or interconnect. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * XILINX BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Except as contained in this notice, the name of the Xilinx shall not be used * in advertising or otherwise to promote the sale, use or other dealings in * this Software without prior written authorization from Xilinx. * ******************************************************************************/ /* * helloworld.c: simple test application * * This application configures UART 16550 to baud rate 9600. * PS7 UART (Zynq) is not initialized by this application, since * bootrom/bsp configures it to baud rate 115200 * * ------------------------------------------------ * | UART TYPE BAUD RATE | * ------------------------------------------------ * uartns550 9600 * uartlite Configurable only in HW design * ps7_uart 115200 (configured by bootrom/bsp) */ #include #include #include "platform.h" #include "xil_printf.h" #include "xfixed_test_hw.h" #include "xfixed_test.h" #include "xil_cache.h" int main() { Xil_DCacheDisable(); int i; short *src1=(short*)malloc(10*sizeof(short)); short *src2=(short*)malloc(10*sizeof(short)); short *dest=(short*)malloc(10*sizeof(short)); for(i=0;i<10;i++){ src1[i]=(short)((rand()%100-50)/10.0*256); src2[i]=(short)((rand()%100-50)/10.0*256); } for(i=0;i<5;i++) printf("%f,",(float)src1[i]/256); printf("\n"); for(i=0;i<5;i++) printf("%f,",(float)src2[i]/256); printf("\n"); XFixed_test HlsXFixed_test; XFixed_test_Config *ExamplePtr; printf("Look Up the device configuration.\n"); ExamplePtr = XFixed_test_LookupConfig(XPAR_FIXED_TEST_0_DEVICE_ID); if (!ExamplePtr) { printf("ERROR: Lookup of accelerator configuration failed.\n\r"); return XST_FAILURE; } printf("Initialize the Device\n"); long status = XFixed_test_CfgInitialize(&HlsXFixed_test, ExamplePtr); if (status != XST_SUCCESS) { printf("ERROR: Could not initialize accelerator.\n\r"); return(-1); } XFixed_test_Set_src1_V(&HlsXFixed_test,(u32)src1); XFixed_test_Set_src2_V(&HlsXFixed_test,(u32)src2); XFixed_test_Set_dest_V(&HlsXFixed_test,(u32)dest); XFixed_test_Start(&HlsXFixed_test); while (XFixed_test_IsDone(&HlsXFixed_test) == 0); xil_printf("***********************************\n"); for(i=0;i<5;i++){ printf("%f,",(float)dest[i]/256); fflush(stdout); } return 0; }
作者:qq_40268672



zynq hls

需要 登录 后方可回复, 如果你还没有账号请 注册新账号