TI中文支持网
TI专业的中文技术问题搜集分享网站

AM335x裸机开发: 读DDR为什么比写慢很多?

AM335x裸机开发: 读DDR为什么比写慢很多?

 开发硬件: AM3352,SPI0.SPIFLASH,DDR3@400MHz/800MTS;

开发软件: CCS; 参考AM335X_StarterWare,建立CCS裸机工程;SPL工程加载APP工程到DDR里运行;

已经使能MMU,打开CACHE(包含L2开启)

测试结果: 读DDR比写DDR要慢很多;

测试流程:  16M字节DDR,连续测试10次; 分正序(cache命中高)和倒序(cache命中低)测试;

测试结果:  发现读DDR比写DDR要慢很多?

同样在E2E上搜到类似https://e2echina.ti.com/question_answer/dsp_arm/sitara_arm/f/25/p/83049/209518

,但没有结果. 麻烦TI FAE分析或测试下;

 

32bit Write Test:

32bit 正序Write 160 MB,use 126 Ms; Spd = 1269 MPS. SUM=0xFEC00000

32bit 倒序Write 160 MB,use 164 Ms; Spd = 975 MPS. SUM=0x1400000

 

32bit read Test:

32bit 正序read 160 MB,use 231 Ms; Spd = 692 MPS. SUM=0xFEC00000

32bit 倒序read 160 MB,use 933 Ms; Spd = 171 MPS. SUM=0x1400000

 

8bit Write Test:

8bit 倒序Write 160 MB,use 785 Ms; Spd = 203 MPS. SUM=0x5000000

8bit 正序Write 160 MB,use 811 Ms; Spd = 197 MPS. SUM=0xFB000000

 

8bit Read Test:

8bit 倒序Read 160 MB,use 1792 Ms; Spd = 89 MPS. SUM=0xFB000000

8bit 正序Read 160 MB,use 1089 Ms; Spd = 146 MPS. SUM=0xFB000000

谢谢!

测试代码:

void Ddr3SpdTst(unsigned int  iStartAddr, unsigned int iSizeBytes )

{

    unsigned int i,j,tSize, tBgnMs,tEndMs,tMB,tSum;

    unsigned int *p32Dst;

    unsigned char *p8Dst;

//——————————————————–   

    UartPrintf("\n32bit Write Test:\n");

//———— 32bit Write 正序

    tBgnMs = gGlobalT12; // 记录开始时间; 单位Ms;

    tSum=0;

    tMB=0;

    for(j=0;j<10;j++) // 循环测试10次;

    {

             p32Dst  = (unsigned int *)iStartAddr;

             tSize = iSizeBytes/4;

             for(i=0;i<tSize;i++)

             {

                   p32Dst[i]=i; // 正序写;

                   tSum+=i;

                   tMB+=4;

             }

    }

    tEndMs = gGlobalT12; // 记录开始时间; 单位Ms; 1ms定时器中断里有更新;

    tMB = tMB /(1024U * 1024U);

    UartPrintf("32bit 正序Write %d MB,use %d Ms; Spd = %d MPS. SUM=0x%X\n", tMB,(tEndMs-tBgnMs),(tMB*1000u)/(tEndMs-tBgnMs) ,tSum);

 

//————

    tBgnMs = gGlobalT12; // 记录开始时间; 单位Ms; 1ms定时器中断里有更新;

    tSum=0;

    tMB=0;

    for(j=0;j<10;j++) // 循环测试10次;

    {

             p32Dst  = (unsigned int *)iStartAddr;

             tSize = iSizeBytes/4;

             for(i=tSize;i!=0;i–)

             {

                   p32Dst[i]=i; // 倒序写;

                   tSum+=i;

                   tMB+=4;

             }

    }

 

    tEndMs = gGlobalT12; // 记录开始时间; 单位Ms; 1ms定时器中断里有更新;

    tMB = tMB/(1024U * 1024U);

   

    UartPrintf("32bit 倒序Write %d MB,use %d Ms; Spd = %d MPS. SUM=0x%X\n", tMB,(tEndMs-tBgnMs),(tMB*1000u)/(tEndMs-tBgnMs) ,tSum);

 

    UartPrintf("\n32bit read Test:\n");

 

//————

    tBgnMs = gGlobalT12; // 记录开始时间; 单位Ms; 1ms定时器中断里有更新;

    tSum=0;

    tMB=0;

    for(j=0;j<10;j++) // 循环测试10次;

    {

             p32Dst  = (unsigned int *)iStartAddr;

             tSize = iSizeBytes/4;

             for(i=0;i<tSize;i++)

             {

                   tSum +=p32Dst[i]; // 正序读;

                   tMB+=4;

             }

    }

    tEndMs = gGlobalT12; // 记录开始时间; 单位Ms; 1ms定时器中断里有更新;

    tMB = tMB/(1024U * 1024U);

    UartPrintf("32bit 正序read %d MB,use %d Ms; Spd = %d MPS. SUM=0x%X\n", tMB,(tEndMs-tBgnMs),(tMB*1000u)/(tEndMs-tBgnMs),tSum );

 

//————

    tBgnMs = gGlobalT12; // 记录开始时间; 单位Ms; 1ms定时器中断里有更新;

    tSum=0;

    tMB=0;

    for(j=0;j<10;j++) // 循环测试10次;

    {

             p32Dst  = (unsigned int *)iStartAddr;

             tSize = iSizeBytes/4;

             for(i=tSize;i!=0;i–)

             {

                   tSum +=p32Dst[i]; // 倒序读;

                   tMB+=4;

             }

    }

 

    tEndMs = gGlobalT12; // 记录开始时间; 单位Ms; 1ms定时器中断里有更新;

    tMB = tMB/(1024U * 1024U);

 

    UartPrintf("32bit 倒序read %d MB,use %d Ms; Spd = %d MPS. SUM=0x%X\n", tMB,(tEndMs-tBgnMs),(tMB*1000u)/(tEndMs-tBgnMs),tSum );

 

 

 

 

 

 

//——————————————————–   

    UartPrintf("\n8bit Write Test:\n");

//————

   

    tBgnMs = gGlobalT12; // 记录开始时间; 单位Ms; 1ms定时器中断里有更新;

    tSum=0;

    tMB=0;

    for(j=0;j<10;j++) // 循环测试10次;

    {

             p8Dst  = (unsigned char *)iStartAddr;

             tSize = iSizeBytes;

             for(i=tSize;i!=0;i–)

             {

                   p8Dst[i]=(unsigned char)i; // 倒序写;

                   tSum+=i;

                   tMB+=1;

             }

    }

 

    tEndMs = gGlobalT12; // 记录开始时间; 单位Ms; 1ms定时器中断里有更新;

    tMB = tMB/(1024U * 1024U);

   

    UartPrintf("8bit 倒序Write %d MB,use %d Ms; Spd = %d MPS. SUM=0x%X\n", tMB,(tEndMs-tBgnMs),(tMB*1000u)/(tEndMs-tBgnMs) ,tSum);

 

//————

    tBgnMs = gGlobalT12; // 记录开始时间; 单位Ms; 1ms定时器中断里有更新;

    tSum=0;

    tMB=0;

    for(j=0;j<10;j++) // 循环测试10次;

    {

             p8Dst  = (unsigned char *)iStartAddr;

             tSize = iSizeBytes;

             for(i=0;i<tSize;i++)

             {

                   p8Dst[i]=(unsigned char)i; // 正序写;

                   tSum+=i;

                   tMB+=1;

             }

    }

 

    tEndMs = gGlobalT12; // 记录开始时间; 单位Ms; 1ms定时器中断里有更新;

    tMB = tMB/(1024U * 1024U);

   

    UartPrintf("8bit 正序Write %d MB,use %d Ms; Spd = %d MPS. SUM=0x%X\n", tMB,(tEndMs-tBgnMs),(tMB*1000u)/(tEndMs-tBgnMs) ,tSum);

 

 

 

 

 

 

 

 

//————

 

    UartPrintf("\n8bit Read Test:\n");

   

    tBgnMs = gGlobalT12; // 记录开始时间; 单位Ms; 1ms定时器中断里有更新;

    tSum=0;

    tMB=0;

    for(j=0;j<10;j++) // 循环测试10次;

    {

             p8Dst  = (unsigned char *)iStartAddr;

             tSize = iSizeBytes;

             for(i=tSize;i!=0;i–)

             {

                   tSum+=p8Dst[i]; // 倒序

                   tMB+=1;

             }

    }

 

    tEndMs = gGlobalT12; // 记录开始时间; 单位Ms; 1ms定时器中断里有更新;

    tMB = tMB/(1024U * 1024U);

   

    UartPrintf("8bit 倒序Read %d MB,use %d Ms; Spd = %d MPS. SUM=0x%X\n", tMB,(tEndMs-tBgnMs),(tMB*1000u)/(tEndMs-tBgnMs) ,tSum);

 

 

//————

    tBgnMs = gGlobalT12; // 记录开始时间; 单位Ms; 1ms定时器中断里有更新;

    tSum=0;

    tMB=0;

    for(j=0;j<10;j++) // 循环测试10次;

    {

             p8Dst  = (unsigned char *)iStartAddr;

             tSize = iSizeBytes;

             for(i=0;i<tSize;i++)

             {

                   tSum+=p8Dst[i]; // 正序

                   tMB+=1;

             }

    }

 

    tEndMs = gGlobalT12; // 记录开始时间; 单位Ms; 1ms定时器中断里有更新;

    tMB = tMB/(1024U * 1024U);

   

    UartPrintf("8bit 正序Read %d MB,use %d Ms; Spd = %d MPS. SUM=0x%X\n", tMB,(tEndMs-tBgnMs),(tMB*1000u)/(tEndMs-tBgnMs),tSum );

 

Shine:

请问DDR也cache使能了么?
processors.wiki.ti.com/…/Common_Issue_Resulting_in_Slow_External_Memory_Performance

user4467014:

回复 Shine:

谢谢回复, 您说的是MMU配置里的这个吗?
REGION regionDdr = {MMU_PGTYPE_SECTION, START_ADDR_DDR, NUM_SECTIONS_DDR,MMU_MEMTYPE_NORMAL_NON_SHAREABLE(MMU_CACHE_WT_NOWA,MMU_CACHE_WB_WA),MMU_REGION_NON_SECURE, MMU_AP_PRV_RW_USR_RW,(unsigned int*)pageTable};

Shine:

回复 user4467014:

是这里配。

Shine:

回复 Shine:

请关注下面的帖子。
e2e.ti.com/…/798714

user4467014:

回复 Shine:

谢谢!

user4467014:

回复 user4467014:

英文E2E https://e2e.ti.com/support/processors/f/791/t/798714 有回复:

修改为: MMU_MEMTYPE_NORMAL_NON_SHAREABLE(MMU_CACHE_WB_WA,  MMU_CACHE_WB_WA)后,测试结果几乎没有变化;

修改为:MMU_MEMTYPE_NORMAL_NON_SHAREABLE(MMU_CACHE_WT_NOWA,  MMU_CACHE_WT_NOWA),测试结果更差;

DMA方式不适合,因为不是读取固定位置的数据;

谢谢!

 

yongqing wang:

回复 user4467014:

有没有在最先的linux SDK上试试?

user4467014:

回复 yongqing wang:

感谢回复!

工程是基于StarterWare的裸机工程,没有跑linux;

谢谢!

赞(0)
未经允许不得转载:TI中文支持网 » AM335x裸机开发: 读DDR为什么比写慢很多?
分享到: 更多 (0)