//test define start #define _GNU_SOURCE #include #include #include //#include #include #include #include #include "typedef.h" #include "ospHeap.h" #include "ucp_printf.h" #include "arm_csu.h" #include "memcpy_csu.h" #include "ospTypes.h" extern int32_t g_dev_mem_fd; static uint64_t get_cycle() { uint64_t cycle; asm volatile("mrs %0, pmccntr_el0" : "=r" (cycle)); return cycle; } // mode=0, memcpy // mode=1, csu int32_t test_arm_csu_simple(uint64_t addrVirSrc, uint64_t addrVirDst, int dataLen) { struct timeval begin,end; UCP_PRINT_DEBUG("init src data. \r\n"); for (int i = 0; i < (dataLen>>2); i++) { *((uint32_t*)addrVirSrc+i) = i; } UCP_PRINT_DEBUG("init dst data. \r\n"); memset((void*)addrVirDst, 0, dataLen); UCP_PRINT_DEBUG("src addr virt to phy. \r\n"); uint64_t addrSrc = 0; osp_virt_to_phy(APE_PHY, addrVirSrc, &addrSrc); UCP_PRINT_DEBUG("src phy addr = 0x%lx .\r\n", addrSrc); UCP_PRINT_DEBUG("dst addr virt to phy. \r\n"); uint64_t addrDst = 0; osp_virt_to_phy(APE_PHY, addrVirDst, &addrDst); UCP_PRINT_DEBUG("dst phy addr = 0x%lx .\r\n", addrDst); UCP_PRINT_DEBUG("start csu transfer. \r\n"); gettimeofday(&begin, NULL); int tag = arm_csu_dma_1D_transfer((uint64_t)addrSrc, (uint64_t)addrDst, dataLen); arm_csu_wait_done(tag); #if 0 while (0 == (status&(1<>2); i++) { *((uint32_t*)addrVirSrc+i) = i; } UCP_PRINT_DEBUG("init dst data. \r\n"); memset((void*)addrVirDst, 0, dataLen*cycle); UCP_PRINT_DEBUG("src addr virt to phy. \r\n"); uint64_t addrSrc = 0; osp_virt_to_phy(APE_PHY, addrVirSrc, &addrSrc); UCP_PRINT_DEBUG("src phy addr = 0x%lx .\r\n", addrSrc); UCP_PRINT_DEBUG("dst addr virt to phy. \r\n"); uint64_t addrDst = 0; osp_virt_to_phy(APE_PHY, addrVirDst, &addrDst); UCP_PRINT_DEBUG("dst phy addr = 0x%lx .\r\n", addrDst); if (0 == mode) { UCP_PRINT_DEBUG("memcpy cost test start ..... \r\n"); gettimeofday(&begin, NULL); for (int i = 0; i < cycle; i++) { memcpy((void*)inAddrDst, (void*)inAddrSrc, dataLen); inAddrSrc += dataLen; inAddrDst += dataLen; } gettimeofday(&end, NULL); UCP_PRINT_DEBUG("100 of nocache to nocache memcpy cost: %ld. \r\n", (end.tv_usec - begin.tv_usec)); *((uint32_t*)addrVirSrc+0xF0004) = (end.tv_usec - begin.tv_usec); } else { UCP_PRINT_DEBUG("arm csu cost test start ..... \r\n"); inAddrSrc = addrSrc; inAddrDst = addrDst; int tag = 0; gettimeofday(&begin, NULL); for (int i = 0; i < cycle; i++) { tag = arm_csu_dma_1D_transfer(inAddrSrc, inAddrDst, dataLen); inAddrSrc += dataLen; inAddrDst += dataLen; } arm_csu_wait_done(tag); gettimeofday(&end, NULL); UCP_PRINT_DEBUG("last tag = %d. \r\n", tag); UCP_PRINT_DEBUG("100 of nocache to nocache arm csu cost: %ld. \r\n", (end.tv_usec - begin.tv_usec)); *((uint32_t*)addrVirSrc+0xF0008) = (end.tv_usec - begin.tv_usec); } return 0; } int32_t test_cache_to_nocache(uint64_t addrVirSrc, uint64_t addrVirDst, int dataLen, int cycle, int mode) { uint64_t inAddrSrc = addrVirSrc; uint64_t inAddrDst = addrVirDst; struct timeval begin,end; UCP_PRINT_DEBUG("init src data. \r\n"); for (int i = 0; i < ((dataLen*cycle)>>2); i++) { *((uint32_t*)addrVirSrc+i) = i; } UCP_PRINT_DEBUG("init dst data. \r\n"); memset((void*)addrVirDst, 0, dataLen*cycle); UCP_PRINT_DEBUG("src addr virt to phy. \r\n"); uint64_t addrSrc = 0; osp_virt_to_phy(ARM_STACK, addrVirSrc, &addrSrc); UCP_PRINT_DEBUG("src phy addr = 0x%lx .\r\n", addrSrc); UCP_PRINT_DEBUG("dst addr virt to phy. \r\n"); uint64_t addrDst = 0; osp_virt_to_phy(APE_TEXT, addrVirDst, &addrDst); UCP_PRINT_DEBUG("dst phy addr = 0x%lx .\r\n", addrDst); if (0 == mode) { UCP_PRINT_DEBUG("memcpy cost test start ..... \r\n"); gettimeofday(&begin, NULL); for (int i = 0; i < cycle; i++) { memcpy((void*)inAddrDst, (void*)inAddrSrc, dataLen); inAddrSrc += dataLen; inAddrDst += dataLen; } gettimeofday(&end, NULL); UCP_PRINT_DEBUG("100 of cache to noncache memcpy cost: %ld. \r\n", (end.tv_usec - begin.tv_usec)); *((uint32_t*)addrVirSrc+0xF0010) = (end.tv_usec - begin.tv_usec); } else { int tag = 0; UCP_PRINT_DEBUG("arm csu cost test start ..... \r\n"); inAddrSrc = addrSrc; inAddrDst = addrDst; gettimeofday(&begin, NULL); for (int i = 0; i < cycle; i++) { // osp_clean_dcache_area((void*)inAddrSrc, dataLen); osp_clean_dcache_area((void*)(addrVirSrc+i*dataLen), dataLen); tag = arm_csu_dma_1D_transfer(inAddrSrc, inAddrDst, dataLen); inAddrSrc += dataLen; inAddrDst += dataLen; } arm_csu_wait_all_done(); gettimeofday(&end, NULL); UCP_PRINT_DEBUG("last tag = %d. \r\n", tag); UCP_PRINT_DEBUG("100 of cache to noncache arm csu cost: %ld. \r\n", (end.tv_usec - begin.tv_usec)); *((uint32_t*)addrVirSrc+0xF0014) = (end.tv_usec - begin.tv_usec); } return 0; } int32_t test_nocache_to_cache(uint64_t addrVirSrc, uint64_t addrVirDst, int dataLen, int cycle, int mode) { uint64_t inAddrSrc = addrVirSrc; uint64_t inAddrDst = addrVirDst; struct timeval begin,end; UCP_PRINT_DEBUG("init src data. \r\n"); for (int i = 0; i < ((dataLen*cycle)>>2); i++) { *((uint32_t*)addrVirSrc+i) = i; } UCP_PRINT_DEBUG("init dst data. \r\n"); memset((void*)addrVirDst, 0, dataLen*cycle); UCP_PRINT_DEBUG("src addr virt to phy. \r\n"); uint64_t addrSrc = 0; osp_virt_to_phy(APE_TEXT, addrVirSrc, &addrSrc); UCP_PRINT_DEBUG("src phy addr = 0x%lx .\r\n", addrSrc); UCP_PRINT_DEBUG("dst addr virt to phy. \r\n"); uint64_t addrDst = 0; osp_virt_to_phy(ARM_STACK, addrVirDst, &addrDst); UCP_PRINT_DEBUG("dst phy addr = 0x%lx .\r\n", addrDst); if (0 == mode) { UCP_PRINT_DEBUG("memcpy cost test start ..... \r\n"); gettimeofday(&begin, NULL); for (int i = 0; i < cycle; i++) { memcpy((void*)inAddrDst, (void*)inAddrSrc, dataLen); inAddrSrc += dataLen; inAddrDst += dataLen; } gettimeofday(&end, NULL); UCP_PRINT_DEBUG("100 of nocache to cache memcpy cost: %ld. \r\n", (end.tv_usec - begin.tv_usec)); *((uint32_t*)addrVirSrc+0xF0020) = (end.tv_usec - begin.tv_usec); } else { int tag = 0; int lenAlign = dataLen; if (dataLen & (~0x3F)) { lenAlign = (((dataLen>>6)+1)<<6); } UCP_PRINT_DEBUG("arm csu cost test start ..... \r\n"); UCP_PRINT_DEBUG("lenAlign = %d. \r\n", lenAlign); inAddrSrc = addrSrc; inAddrDst = addrDst; gettimeofday(&begin, NULL); for (int i = 0; i < cycle; i++) { osp_flush_dcache_area((void*)(addrVirDst+i*lenAlign), dataLen); tag = arm_csu_dma_1D_transfer(inAddrSrc, inAddrDst, dataLen); // osp_invalid_dcache_area(ARM_STACK, (addrVirDst+i*lenAlign), lenAlign); inAddrSrc += dataLen; inAddrDst += lenAlign; } arm_csu_wait_all_done(); gettimeofday(&end, NULL); UCP_PRINT_DEBUG("last tag = %d. \r\n", tag); UCP_PRINT_DEBUG("100 of nocache to cache arm csu cost: %ld. \r\n", (end.tv_usec - begin.tv_usec)); *((uint32_t*)addrVirSrc+0xF0024) = (end.tv_usec - begin.tv_usec); } return 0; } int32_t test_cache_to_cache(uint64_t addrVirSrc, uint64_t addrVirDst, int dataLen, int cycle, int mode) { uint64_t inAddrSrc = addrVirSrc; uint64_t inAddrDst = addrVirDst; struct timeval begin,end; UCP_PRINT_DEBUG("init src data. \r\n"); for (int i = 0; i < ((dataLen*cycle)>>2); i++) { *((uint32_t*)addrVirSrc+i) = i; } UCP_PRINT_DEBUG("init dst data. \r\n"); memset((void*)addrVirDst, 0, dataLen*cycle); UCP_PRINT_DEBUG("src addr virt to phy. \r\n"); uint64_t addrSrc = 0; osp_virt_to_phy(ARM_STACK, addrVirSrc, &addrSrc); UCP_PRINT_DEBUG("src phy addr = 0x%lx .\r\n", addrSrc); UCP_PRINT_DEBUG("dst addr virt to phy. \r\n"); uint64_t addrDst = 0; osp_virt_to_phy(ARM_STACK, addrVirDst, &addrDst); UCP_PRINT_DEBUG("dst phy addr = 0x%lx .\r\n", addrDst); if (0 == mode) { UCP_PRINT_DEBUG("memcpy cost test start ..... \r\n"); gettimeofday(&begin, NULL); for (int i = 0; i < cycle; i++) { memcpy((void*)inAddrDst, (void*)inAddrSrc, dataLen); inAddrSrc += dataLen; inAddrDst += dataLen; } gettimeofday(&end, NULL); UCP_PRINT_DEBUG("100 of cache to cache memcpy cost: %ld. \r\n", (end.tv_usec - begin.tv_usec)); *((uint32_t*)addrVirSrc+0xF0030) = (end.tv_usec - begin.tv_usec); } else { int tag = 0; int lenAlign = dataLen; if (dataLen & (~0x3F)) { lenAlign = (((dataLen>>6)+1)<<6); } UCP_PRINT_DEBUG("arm csu cost test start ..... \r\n"); UCP_PRINT_DEBUG("lenAlign = %d. \r\n", lenAlign); inAddrSrc = addrSrc; inAddrDst = addrDst; gettimeofday(&begin, NULL); for (int i = 0; i < cycle; i++) { osp_clean_dcache_area((void*)(addrVirSrc+i*dataLen), dataLen); osp_flush_dcache_area((void*)(addrVirDst+i*lenAlign), dataLen); tag = arm_csu_dma_1D_transfer(inAddrSrc, inAddrDst, dataLen); //osp_invalid_dcache_area(ARM_STACK, (addrVirDst+i*lenAlign), lenAlign); inAddrSrc += dataLen; inAddrDst += lenAlign; } arm_csu_wait_all_done(); gettimeofday(&end, NULL); UCP_PRINT_DEBUG("last tag = %d. \r\n", tag); UCP_PRINT_DEBUG("100 of cache to cache arm csu cost: %ld. \r\n", (end.tv_usec - begin.tv_usec)); *((uint32_t*)addrVirSrc+0xF0034) = (end.tv_usec - begin.tv_usec); } return 0; } void test_arm_csu(void) { uint64_t stack_len = 0; void *stack_static_ptr; void *ape_phy_static_ptr; void *ape_txt_static_ptr; stack_static_ptr = get_static_mem(ARM_STACK, &stack_len); // cache, 4g if((void *)OSP_ERROR == stack_static_ptr) { UCP_PRINT_ERROR("get_static_mem-ARM_STACK:error\n"); } else { UCP_PRINT_DEBUG("get_static_mem-ARM_STACK:ok! stack_static_ptr = %lx\n",(uint64_t)stack_static_ptr); } ape_phy_static_ptr = get_static_mem(APE_PHY, &stack_len); // noncache, 2g if((void *)OSP_ERROR == ape_phy_static_ptr) { UCP_PRINT_ERROR("get_static_mem-APE_PHY:error\n"); } else { UCP_PRINT_DEBUG("get_static_mem-APE_PHY:ok! ape_phy_static_ptr = %lx\n",(uint64_t)ape_phy_static_ptr); } ape_txt_static_ptr = get_static_mem(APE_TEXT, &stack_len); // noncache, 2g if((void *)OSP_ERROR == ape_txt_static_ptr) { UCP_PRINT_ERROR("get_static_mem-APE_TEXT:error\n"); } else { UCP_PRINT_DEBUG("get_static_mem-APE_TEXT:ok! ape_txt_static_ptr = %lx\n",(uint64_t)ape_txt_static_ptr); } int dataLen = 0; uint64_t addrVirSrc = 0; uint64_t addrVirDst = 0; UCP_PRINT_DEBUG("arm csu simple test start ...... \r\n"); dataLen = 1400; addrVirSrc = (uint64_t)ape_phy_static_ptr; addrVirDst = (uint64_t)(ape_phy_static_ptr+0x80000); test_arm_csu_simple(addrVirSrc, addrVirDst, dataLen); UCP_PRINT_DEBUG("nocache to noncache memcpy cost test start ..... \r\n"); dataLen = 1400; addrVirSrc = (uint64_t)(ape_phy_static_ptr + dataLen); addrVirDst = (uint64_t)(ape_phy_static_ptr+0x80000+dataLen); test_nocache_to_nocache(addrVirSrc, addrVirDst, dataLen, 100, 0); UCP_PRINT_DEBUG("nocache to noncache arm csu cost test start ..... \r\n"); dataLen = 1400; addrVirSrc += (dataLen*100); addrVirDst += (dataLen*100); test_nocache_to_nocache(addrVirSrc, addrVirDst, dataLen, 100, 1); UCP_PRINT_DEBUG("cache to noncache memcpy cost test start ..... \r\n"); dataLen = 1400; addrVirSrc = (uint64_t)stack_static_ptr; addrVirDst = (uint64_t)ape_txt_static_ptr; test_cache_to_nocache(addrVirSrc, addrVirDst, dataLen, 100, 0); UCP_PRINT_DEBUG("cache to noncache arm csu cost test start ..... \r\n"); dataLen = 1400; addrVirSrc = (uint64_t)(stack_static_ptr+0x40000); addrVirDst = (uint64_t)(ape_txt_static_ptr+0x40000); test_cache_to_nocache(addrVirSrc, addrVirDst, dataLen, 100, 1); UCP_PRINT_DEBUG("nocache to cache memcpy cost test start ..... \r\n"); dataLen = 1400; addrVirSrc = (uint64_t)(ape_txt_static_ptr+0x80000); addrVirDst = (uint64_t)(stack_static_ptr+0x80000); test_nocache_to_cache(addrVirSrc, addrVirDst, dataLen, 100, 0); UCP_PRINT_DEBUG("nocache to cache arm csu cost test start ..... \r\n"); dataLen = 1400; addrVirSrc = (uint64_t)(ape_txt_static_ptr+0xC0000); addrVirDst = (uint64_t)(stack_static_ptr+0xC0000); test_nocache_to_cache(addrVirSrc, addrVirDst, dataLen, 100, 1); UCP_PRINT_DEBUG("cache to cache memcpy cost test start ..... \r\n"); dataLen = 1400; addrVirSrc = (uint64_t)(stack_static_ptr+0x100000); addrVirDst = (uint64_t)(stack_static_ptr+0x180000); test_cache_to_cache(addrVirSrc, addrVirDst, dataLen, 100, 0); UCP_PRINT_DEBUG("cache to cache arm csu cost test start ..... \r\n"); dataLen = 1400; addrVirSrc = (uint64_t)(stack_static_ptr+0x140000); addrVirDst = (uint64_t)(stack_static_ptr+0x1C0000); test_cache_to_cache(addrVirSrc, addrVirDst, dataLen, 100, 1); } int32_t test_memcpy_csu_stack_to_msg(uint64_t addrVirSrc, uint64_t addrVirDst, int dataLen, int cycle, int mode) { uint64_t inAddrSrc = addrVirSrc; uint64_t inAddrDst = addrVirDst; struct timeval begin,end; UCP_PRINT_DEBUG("init src data. \r\n"); for (int i = 0; i < ((dataLen*cycle)>>2); i++) { *((uint32_t*)addrVirSrc+i) = i; } UCP_PRINT_DEBUG("init dst data. \r\n"); memset((void*)addrVirDst, 0, dataLen*cycle); if (0 == mode) { UCP_PRINT_DEBUG("memcpy cost test start ..... \r\n"); gettimeofday(&begin, NULL); for (int i = 0; i < cycle; i++) { memcpy((void*)inAddrDst, (void*)inAddrSrc, dataLen); inAddrSrc += dataLen; inAddrDst += dataLen; } gettimeofday(&end, NULL); UCP_PRINT_DEBUG("%d of stack to msg memcpy cost: %ld. \r\n", cycle, (end.tv_usec - begin.tv_usec)); } else { UCP_PRINT_DEBUG("arm csu cost test start ..... \r\n"); gettimeofday(&begin, NULL); for (int i = 0; i < cycle; i++) { memcpy_csu(inAddrDst, inAddrSrc, dataLen, STACK2MSG, 0); inAddrSrc += dataLen; inAddrDst += dataLen; } arm_csu_wait_all_done(); gettimeofday(&end, NULL); UCP_PRINT_DEBUG("%d of stack to msg arm csu cost: %ld. \r\n", cycle, (end.tv_usec - begin.tv_usec)); } return 0; } int32_t test_memcpy_csu_msg_to_stack(uint64_t addrVirSrc, uint64_t addrVirDst, int dataLen, int cycle, int mode) { uint64_t inAddrSrc = addrVirSrc; uint64_t inAddrDst = addrVirDst; struct timeval begin,end; UCP_PRINT_DEBUG("init src data. \r\n"); for (int i = 0; i < ((dataLen*cycle)>>2); i++) { *((uint32_t*)addrVirSrc+i) = i; } UCP_PRINT_DEBUG("init dst data. \r\n"); memset((void*)addrVirDst, 0, dataLen*cycle); if (0 == mode) { UCP_PRINT_DEBUG("memcpy cost test start ..... \r\n"); gettimeofday(&begin, NULL); for (int i = 0; i < cycle; i++) { memcpy((void*)inAddrDst, (void*)inAddrSrc, dataLen); osp_clean_dcache_area((void *)inAddrDst, dataLen); inAddrSrc += dataLen; inAddrDst += dataLen; } gettimeofday(&end, NULL); UCP_PRINT_DEBUG("%d of msg to stack memcpy cost: %ld. \r\n", cycle, (end.tv_usec - begin.tv_usec)); } else { UCP_PRINT_DEBUG("arm csu cost test start ..... \r\n"); gettimeofday(&begin, NULL); for (int i = 0; i < cycle; i++) { memcpy_csu(inAddrDst, inAddrSrc, dataLen, MSG2STACK, 1); inAddrSrc += dataLen; inAddrDst += dataLen; } // arm_csu_wait_all_done(); gettimeofday(&end, NULL); UCP_PRINT_DEBUG("%d of msg to stack arm csu cost: %ld. \r\n", cycle, (end.tv_usec - begin.tv_usec)); } return 0; } void test_memcpy_csu(void) { uint64_t stack_len = 0; uint64_t msg_len = 0; void *stack_static_ptr = NULL; void *msg_static_ptr = NULL; stack_static_ptr = get_static_mem(ARM_STACK, &stack_len); // cache, 4g if((void *)OSP_ERROR == stack_static_ptr) { UCP_PRINT_ERROR("get_static_mem-ARM_STACK:error\n"); } else { UCP_PRINT_DEBUG("get_static_mem-ARM_STACK:ok! stack_static_ptr = %lx, len = %lx.\n",(uint64_t)stack_static_ptr, stack_len); } msg_static_ptr = get_static_mem(ARM_APE_MSG, &msg_len); // noncache, 128M if((void *)OSP_ERROR == msg_static_ptr) { UCP_PRINT_ERROR("get_static_mem-ARM_APE_MSG:error\n"); } else { UCP_PRINT_DEBUG("get_static_mem-ARM_APE_MSG:ok! msg_static_ptr = %lx, len = %lx.\n",(uint64_t)msg_static_ptr, msg_len); } int dataLen = 0; uint64_t addrVirSrc = 0; uint64_t addrVirDst = 0; UCP_PRINT_DEBUG("stack to msg memcpy cost test start ...... \r\n"); dataLen = 1400; addrVirSrc = (uint64_t)(stack_static_ptr); addrVirDst = (uint64_t)(msg_static_ptr); test_memcpy_csu_stack_to_msg(addrVirSrc, addrVirDst, dataLen, 100, 0); UCP_PRINT_DEBUG("stack to msg arm csu cost test start ..... \r\n"); dataLen = 1400; addrVirSrc = (uint64_t)(stack_static_ptr+0x40000); addrVirDst = (uint64_t)(msg_static_ptr+0x40000); test_memcpy_csu_stack_to_msg(addrVirSrc, addrVirDst, dataLen, 100, 1); UCP_PRINT_DEBUG("msg to stack memcpy cost test start ...... \r\n"); dataLen = 140000; addrVirSrc = (uint64_t)(msg_static_ptr+0x80000); addrVirDst = (uint64_t)(stack_static_ptr+0x80000); test_memcpy_csu_msg_to_stack(addrVirSrc, addrVirDst, dataLen, 1, 0); UCP_PRINT_DEBUG("msg to stack arm csu cost test start ..... \r\n"); dataLen = 140000; addrVirSrc = (uint64_t)(msg_static_ptr+0xC0000); addrVirDst = (uint64_t)(stack_static_ptr+0xC0000); test_memcpy_csu_msg_to_stack(addrVirSrc, addrVirDst, dataLen, 1, 1); } int32_t test_case(uint32_t argc, int32_t* argvp) { UCP_PRINT_DEBUG("start running testcase 2."); UCP_PRINT_DEBUG("start arm csu init. \r\n"); // arm_csu_init(); test_memcpy_csu(); return 0; }