TI中文支持网
TI专业的中文技术问题搜集分享网站

am5728的arm跑linux,dsp跑sysbios6.45,内存性能差

    Am5728的arm(1.5GHz)跑linux系统,dsp(750MHz)跑sysbios6.45,在dsp核中配置如下:

bld文件:Build.platformTable["ti.platforms.evmDRA7XX:dsp2"] = {
    externalMemoryMap: [
        [ EXT_CODE_DSP2.name, EXT_CODE_DSP2 ],
        [ EXT_DATA_DSP2.name, EXT_DATA_DSP2 ],
        [ EXT_HEAP_DSP2.name, EXT_HEAP_DSP2 ],
        [ TRACE_BUF.name , TRACE_BUF ],
        [ EXC_DATA.name  , EXC_DATA ],
        [ PM_DATA.name  , PM_DATA ],
        [ CMEM.name   , CMEM ],
  [ SR_0.name   , SR_0 ],
    ],
    codeMemory: "EXT_CODE",
    dataMemory: "EXT_DATA",
    stackMemory: "EXT_DATA",
    l1DMode: "32k",
    l1PMode: "32k",
    l2Mode: "128k"
};

cfg文件:

==========================================================================

/*
 * Copyright (c) 2015 Texas Instruments Incorporated – http://www.ti.com
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *   Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 *
 *   Redistributions in binary form must reproduce the above copyright
 *   notice, this list of conditions and the following disclaimer in the
 *   documentation and/or other materials provided with the
 *   distribution.
 *
 *   Neither the name of Texas Instruments Incorporated nor the names of
 *   its contributors may be used to endorse or promote products derived
 *   from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 *  ======== Dsp2.cfg ========
 *  Platform: DRA7XX_linux_elf
 *  Target: ti.targets.elf.C66
 */

/* root of the configuration object model */
var Program = xdc.useModule('xdc.cfg.Program');

/* application uses the following modules and packages */
xdc.useModule('xdc.runtime.Assert');
xdc.useModule('xdc.runtime.Diags');
xdc.useModule('xdc.runtime.Error');
xdc.useModule('xdc.runtime.Log');
xdc.useModule('xdc.runtime.Registry');
var Clock = xdc.useModule('ti.sysbios.knl.Clock');
var Swi = xdc.useModule('ti.sysbios.knl.Swi');

xdc.useModule('ti.sysbios.knl.Semaphore');
xdc.useModule('ti.sysbios.knl.Task');

xdc.loadPackage('ti.sdo.ipc.family.vayu');
xdc.useModule('ti.sdo.ipc.family.vayu.InterruptDsp');
xdc.loadPackage('ti.ipc.rpmsg');
xdc.loadPackage('ti.ipc.family.vayu');

/*
 *  ======== IPC Configuration ========
 */
xdc.useModule('ti.ipc.ipcmgr.IpcMgr');
var BIOS = xdc.useModule('ti.sysbios.BIOS');
BIOS.addUserStartupFunction('&IpcMgr_ipcStartup');
BIOS.addUserStartupFunction('&IpcMgr_callIpcStart');

/* describe the processors in the system */
var MultiProc = xdc.useModule('ti.sdo.utils.MultiProc');
MultiProc.setConfig("DSP2", ["HOST", "IPU2", "IPU1", "DSP2", "DSP1"]);

/* GateMP host support */
var GateMP = xdc.useModule('ti.sdo.ipc.GateMP');
GateMP.hostSupport = true;

/* shared region configuration */
var SharedRegion = xdc.useModule('ti.sdo.ipc.SharedRegion');

/* configure SharedRegion #0 (IPC) */
var SR0Mem = Program.cpu.memoryMap["SR_0"];

SharedRegion.setEntryMeta(0,
    new SharedRegion.Entry({
        name:           "SR0",
        base:           SR0Mem.base,
        len:            SR0Mem.len,
        ownerProcId:    MultiProc.getIdMeta("DSP2"),
        cacheEnable:    true,
        isValid:        true
    })
);

/* Override the default resource table with my own to add SR0 */
var Resource = xdc.useModule('ti.ipc.remoteproc.Resource');
Resource.customTable = true;

//Cache.PC | Cache.WTE | Cache.PCX | Cache.PFX);
var Cache = xdc.useModule('ti.sysbios.family.c66.Cache');//Mar_DISABLE  Mar_ENABLE
Cache.setMarMeta(0x9e000000, 0x00800000, Cache.PC | Cache.WTE | Cache.PCX | Cache.PFX);
Cache.setMarMeta(0xA0400000, 0x02000000, Cache.Mar_ENABLE);

/*
 *  ======== SYS/BIOS Configuration ========
 */
if (Program.build.profile == "debug") {
    BIOS.libType = BIOS.LibType_Debug;
} else {
    BIOS.libType = BIOS.LibType_Custom;
}

/* no rts heap */
Program.argSize = 100;  /* minimum size */
Program.stack = 0x1000;

xdc.useModule('ti.sysbios.xdcruntime.GateThreadSupport');
var GateSwi   = xdc.useModule('ti.sysbios.gates.GateSwi');

var Task = xdc.useModule('ti.sysbios.knl.Task');
Task.common$.namedInstance = true;

/* default memory heap */
var Memory = xdc.useModule('xdc.runtime.Memory');
var HeapMem = xdc.useModule('ti.sysbios.heaps.HeapMem');
var heapMemParams = new HeapMem.Params();
heapMemParams.size = 0x80000;
Memory.defaultHeapInstance = HeapMem.create(heapMemParams);

/* create a heap for MessageQ messages */
var HeapBuf = xdc.useModule('ti.sysbios.heaps.HeapBuf');
var params = new HeapBuf.Params;
params.align = 8;
params.blockSize = 512;
params.numBlocks = 256;
var msgHeap = HeapBuf.create(params);

var MessageQ  = xdc.useModule('ti.sdo.ipc.MessageQ');
MessageQ.registerHeapMeta(msgHeap, 0);

/* Setup MessageQ transport */
var VirtioSetup = xdc.useModule('ti.ipc.transports.TransportRpmsgSetup');
MessageQ.SetupTransportProxy = VirtioSetup;

/* Setup NameServer remote proxy */
var NameServer = xdc.useModule("ti.sdo.utils.NameServer");
var NsRemote = xdc.useModule("ti.ipc.namesrv.NameServerRemoteRpmsg");
NameServer.SetupProxy = NsRemote;

/* Enable Memory Translation module that operates on the BIOS Resource Table */
var Resource = xdc.useModule('ti.ipc.remoteproc.Resource');
Resource.loadSegment = "EXT_CODE";

/*  Use SysMin because trace buffer address is required for Linux/QNX
 *  trace debug driver, plus provides better performance.
 */
var System = xdc.useModule('xdc.runtime.System');
var SysMin = xdc.useModule('ti.trace.SysMin');
System.SupportProxy = SysMin;
SysMin.bufSize  = 0x8000;

Program.sectMap[".tracebuf"] = "TRACE_BUF";
Program.sectMap[".errorbuf"] = "EXC_DATA";

/* ————————— TICK ————————————–*/
//var Clock = xdc.useModule('ti.sysbios.knl.Clock');
//Clock.tickSource = Clock.TickSource_NULL;
//Clock.tickSource = Clock.TickSource_USER;
/* Configure BIOS clock source as GPTimer5 */
//Clock.timerId = 0;

var Timer = xdc.useModule('ti.sysbios.timers.dmtimer.Timer');

/* Skip the Timer frequency verification check. Need to remove this later */
Timer.checkFrequency = false;

/* Match this to the SYS_CLK frequency sourcing the dmTimers.
 * Not needed once the SYS/BIOS family settings is updated. */
Timer.intFreq.hi = 0;
Timer.intFreq.lo = 19200000;

//var timerParams = new Timer.Params();
//timerParams.period = Clock.tickPeriod;
//timerParams.periodType = Timer.PeriodType_MICROSECS;
/* Switch off Software Reset to make the below settings effective */
//timerParams.tiocpCfg.softreset = 0x0;
/* Smart-idle wake-up-capable mode */
//timerParams.tiocpCfg.idlemode = 0x3;
/* Wake-up generation for Overflow */
//timerParams.twer.ovf_wup_ena = 0x1;
//Timer.create(Clock.timerId, Clock.doTick, timerParams);

/*
 *  ======== Power Management Configuration ========
 */
/* Modules used in Power Management */
/*xdc.loadPackage('ti.pm');

var Power = xdc.useModule('ti.sysbios.family.c66.vayu.Power');
Power.loadSegment = "PM_DATA";
*/

/* Idle function that periodically flushes the unicache */
var Idle = xdc.useModule('ti.sysbios.knl.Idle');
Idle.addFunc('&VirtQueue_cacheWb');

/*
 *  ======== Instrumentation Configuration ========
 */

/* system logger */
var LoggerSys = xdc.useModule('xdc.runtime.LoggerSys');
var LoggerSysParams = new LoggerSys.Params();
var Defaults = xdc.useModule('xdc.runtime.Defaults');

/* Enable Logger: */
Defaults.common$.logger = LoggerSys.create(LoggerSysParams);

/* enable runtime Diags_setMask() for non-XDC spec'd modules */
var Diags = xdc.useModule('xdc.runtime.Diags');
Diags.setMaskEnabled = true;

/* override diags mask for selected modules */
xdc.useModule('xdc.runtime.Main');
Diags.setMaskMeta("xdc.runtime.Main",
    Diags.ENTRY | Diags.EXIT | Diags.INFO, Diags.RUNTIME_ON);

var Registry = xdc.useModule('xdc.runtime.Registry');
Registry.common$.diags_ENTRY = Diags.RUNTIME_OFF;
Registry.common$.diags_EXIT  = Diags.RUNTIME_OFF;
Registry.common$.diags_INFO  = Diags.RUNTIME_OFF;
Registry.common$.diags_USER1 = Diags.RUNTIME_OFF;
Registry.common$.diags_LIFECYCLE = Diags.RUNTIME_OFF;
Registry.common$.diags_STATUS = Diags.RUNTIME_OFF;

var Main = xdc.useModule('xdc.runtime.Main');
Main.common$.diags_ASSERT = Diags.ALWAYS_ON;
Main.common$.diags_INTERNAL = Diags.ALWAYS_ON;

//==============================================================================
/* ================ Driver configuration ================ */

/* Load the Osal package */
var osType           = "tirtos";
var Osal             = xdc.loadPackage('ti.osal');
Osal.Settings.osType = osType;

/*use CSL package*/
var socType           = "am572x";
var Csl = xdc.loadPackage('ti.csl');
Csl.Settings.deviceType = socType;

/* Load the gpio package */
var GPIO               = xdc.loadPackage('ti.drv.gpio');
GPIO.Settings.socType  = socType;

========================================================================================

主要问题:

        1、程序中,使用memcpy拷贝512Byte时耗时4us左右,速度125MB/s,内存性能太差了,内存的频率都532MHz,不知道还需要怎样设置工程才能尽量达到理论速度。有没有关于cache(L1P/L1D/L2)、内存性能测试的例程,想查看一下是怎样配置工程以便达到最好的性能。

2、有关cache的应该查看哪些文档?我查看了csl的<ti/csl/csl_cacheAux.h>,<ti/csl/csl_cache.h>,<ti/csl/csl_xmcAux.h>等文件。不知道有没有prefetch某一段内存、变量或函数的指令、prefetch后锁定在cache(L1P/L1D/L2)中,当我对改内存、变量或函数的使用完成后解除锁定、释放cache等。

yongqing wang:

内存共享可以解决内存拷贝的问题

Denny%20Yang99373:

估计不是内存性能的问题,是IPC通信效率问题。COPY 1K 2K的数据估计还是4us左右。
要想解决这个问题,就需要自己写驱动

yongqing wang:

回复 Denny%20Yang99373:

自己写驱动这个难度就大了

Denny%20Yang99373:

回复 yongqing wang:

e2echina.ti.com/…/400159
可以看看这个链接

Kevin Le82:

回复 Denny%20Yang99373:

    非常感谢各位的热心帮助。dsp核心使用的内存是由linux分配好,dsp程序按照固件加载的,内存定义参考图片:

因为数据是由fpga通过pcie传输到特定位置的,所以需要将该数据拷贝到另外的地方使用,否则会有被覆盖的问题。

已经启用了L1P/L1D/L2的cache功能,   

    l1DMode: "32k",    l1PMode: "32k",    l2Mode: "128k"

内存也指定了可以cache和prefetch:

//Cache.PC | Cache.WTE | Cache.PCX | Cache.PFX);var Cache = xdc.useModule('ti.sysbios.family.c66.Cache');//Mar_DISABLE  Mar_ENABLECache.setMarMeta(0x9e000000, 0x00800000, Cache.PC | Cache.WTE | Cache.PCX | Cache.PFX);Cache.setMarMeta(0xA0400000, 0x02000000, Cache.Mar_ENABLE);

就是使用简单的memcpy函数,连续执行以下拷贝:

memcpy(a,b,2k);//执行这个时间会很长,10us多

memcpy(c,a,2k);//接着执行这个,才530clock左右(550/750Hz=1us以下)

感觉就是第一次拷贝时,数据没有连续prefetch,之后接着拷贝时,因为都在cache中所以就很快。

有cache的相关操作函数吗?没有看到preftch、lock、unlock等控制函数,只有wb、inv等函数

Denny%20Yang99373:

回复 Kevin Le82:

cache的prefetch属性是在cache初始化的时候设置的,运行起来只有wb inv函数

赞(0)
未经允许不得转载:TI中文支持网 » am5728的arm跑linux,dsp跑sysbios6.45,内存性能差
分享到: 更多 (0)