Difference between revisions of "Extending Disassembly with Ozone"

From SEGGER Wiki
Jump to: navigation, search
(References)
(Plugin Example)
(37 intermediate revisions by 2 users not shown)
Line 5: Line 5:
 
== Introduction ==
 
== Introduction ==
   
With the advent of custom instructions in Armv8-A in mid 2019,
+
With RISC-V and the advent of custom instructions in Armv8-A in mid 2019,
 
all of Ozone's supported target architectures now allow MCU core vendors
 
all of Ozone's supported target architectures now allow MCU core vendors
 
to add custom instructions to their design.
 
to add custom instructions to their design.
Line 67: Line 67:
   
 
Next to the predefined script functions, users are free to add their own functions
 
Next to the predefined script functions, users are free to add their own functions
to disassembly support scripts in order to structure the code.
+
to disassembly plugins in order to structure the code.
   
== Example Implementation ==
+
== Script Functions ==
   
This section provides an example implementation which adds support
+
This section demonstrates the use of each script function
  +
on the basis of an example implementation.
  +
The example implementation adds support
 
for a custom instruction on a RI5CY RISC-V MCU core.
 
for a custom instruction on a RI5CY RISC-V MCU core.
   
Line 103: Line 105:
 
function init() {
 
function init() {
   
var InstLen;
+
var aInst = new Array();
var InstData = new Array();
+
var aMask = new Array();
var InstMask = new Array();
 
 
//
 
//
 
// Mark the instruction "ADDI sp, sp, -16" (0x1141) as overridden by this plugin
 
// Mark the instruction "ADDI sp, sp, -16" (0x1141) as overridden by this plugin
 
//
 
//
InstLen = 0x2;
+
aInst[0] = 0x41;
InstData[0] = 0x41;
+
aInst[1] = 0x11;
  +
aMask[0] = 0xFF; // all encoding bits are relevant
InstData[1] = 0x11;
 
InstMask[0] = 0xFF; // all encoding bits are relevant
+
aMask[1] = 0xFF; // all encoding bits are relevant
InstMask[1] = 0xFF; // all encoding bits are relevant
 
   
Debug.enableOverrideInst(InstLen, InstData, InstMask);
+
Debug.enableOverrideInst(aInst, aMask);
   
 
return 0;
 
return 0;
Line 140: Line 140:
 
*
 
*
 
* Function Parameters
 
* Function Parameters
* InstAddr instruction address (type: U64)
+
* Addr instruction address (type: U64)
* InstLen instruction byte length (type: U32)
+
* aInst instruction bytes (type: byte array)
* InstData instruction bytes (type: byte array)
+
* Flags basic information about the instruction required for analysis.
  +
* Interpretation depends on architecture.
* Flags basic information about the instruction required for analysis.
 
* Interpretation depends on architecture.
 
 
*
 
*
 
* Return Value
 
* Return Value
Line 151: Line 150:
 
* mnemonic and a single tab before a possible trailing comment
 
* mnemonic and a single tab before a possible trailing comment
 
*/
 
*/
function printInstAsm(InstAddr, InstLen, InstData, Flags) {
+
function printInstAsm(Addr, aInst, Flags) {
   
if (InstLen == 4) {
+
if (aInst.length == 4) {
   
var Encoding = (InstData[3] << 24) | (InstData[2] << 16) |
+
var Encoding = (aInst[3] << 24) | (aInst2] << 16) |
(InstData[1] << 8) | InstData[0];
+
(aInst[1] << 8) | aInst[0];
   
 
if ((Encoding & 0x707F) == 0x2063) { // opcode == "P.BEQIMM" ?
 
if ((Encoding & 0x707F) == 0x2063) { // opcode == "P.BEQIMM" ?
Line 163: Line 162:
 
//
 
//
 
// Operation:
 
// Operation:
// If (Rs1 == Imm5) branch to InstAddr + (Imm12 << 1).
+
// If (Rs1 == Imm5) branch to Addr + (Imm12 << 1).
 
//
 
//
 
// Encoding = {Imm12 | Imm5 | rs1 | funct3 | Imm12 | opcode}
 
// Encoding = {Imm12 | Imm5 | rs1 | funct3 | Imm12 | opcode}
Line 171: Line 170:
 
// - - - 010 - 1100011
 
// - - - 010 - 1100011
 
//
 
//
var a = (Encoding & 0x80) >> 7; // Encoding[7:7]
+
var a = (Encoding >> 7) & 1;
var b = (Encoding & 0xF00) >> 8; // Encoding[11:8]
+
var b = (Encoding >> 8) & 0xF;
var c = (Encoding & 0x7E000000) >> 25; // Encoding[30:25]
+
var c = (Encoding >> 25) & 0x3F;
var d = (Encoding & 0x80000000) >> 25; // Encoding[31]
+
var d = (Encoding >> 31) & 1;
var Imm5 = (Encoding & 0x1F00000) >> 20;
+
var Imm5 = (Encoding >> 20) & 0x1F;
var Rs1 = (Encoding & 0xF8000) >> 15;
+
var Rs1 = (Encoding >> 15) & 0x1F;
   
 
var Imm12 = (b | (c << 4) | (a << 11) | (d << 12)) << 1;
 
var Imm12 = (b | (c << 4) | (a << 11) | (d << 12)) << 1;
   
var sSymbol = Debug.getSymbol(InstAddr + Imm12);
+
var sSymbol = Debug.getSymbol(Addr + Imm12);
   
 
var sInst = "P.BEQIMM\t" + getRegName(Rs1) + ", " + Imm5 + ", " + Imm12;
 
var sInst = "P.BEQIMM\t" + getRegName(Rs1) + ", " + Imm5 + ", " + Imm12;
Line 191: Line 190:
 
}
 
}
   
} else if (InstLen == 2) {
+
} else if (aInst.length == 2) {
var Encoding = (InstData[1] << 8) | InstData[0];
+
var Encoding = (aInst[1] << 8) | aInst[0];
 
if (Encoding == 0x1141) { // "ADDI sp, sp, -16" ?
 
if (Encoding == 0x1141) { // "ADDI sp, sp, -16" ?
 
return "ADDI\tsp, sp, -0x10";
 
return "ADDI\tsp, sp, -0x10";
Line 234: Line 233:
 
*
 
*
 
* Function Parameters
 
* Function Parameters
* InstAddr instruction address (type: U64)
+
* Addr instruction address (type: U64)
* InstLen instruction byte length (type: U32)
+
* aInst instruction data bytes (type: byte array)
* InstData instruction data bytes (type: byte array)
+
* Flags basic information about the instruction required for analysis.
  +
* Interpretation depends on architecture (type: U32)
* Flags basic information about the instruction required for analysis.
 
* Interpretation depends on architecture (type: U32)
 
 
*
 
*
 
* Return Value
 
* Return Value
* undefined: if the input instruction is not supported by this plugin
+
* undefined: if the input instruction is not supported by this plugin
* InstInfo: a javascript object corresponding to the following C structure:
+
* InstInfo: a javascript object corresponding to the following C structure:
 
*
 
*
 
* struct INST_INFO {
 
* struct INST_INFO {
Line 248: Line 246:
 
* U32 Size; // instruction byte size
 
* U32 Size; // instruction byte size
 
* U64 AccessAddr; // branch address or memory access address
 
* U64 AccessAddr; // branch address or memory access address
* int StackAdjust; // Difference between SP before and after instruction execution
+
* int StackAdjust; // Difference of SP before and after instruction execution
 
* U32 Flags; // binary instruction information
 
* U32 Flags; // binary instruction information
 
* }
 
* }
*
 
* Notes
 
* (1) Example input
 
*
 
* InstAddr 0x20000192
 
* InstLen 4
 
* InstData 63 2A 36 06 ("P.BEQIMM a2, 3, 116")
 
* Flags 0
 
 
*/
 
*/
function getInstInfo(InstAddr, InstLen, InstData, Flags) {
+
function getInstInfo(Addr, aInst, Flags) {
   
if (InstLen == 4) {
+
if (aInst.length == 4) {
   
var Encoding = (InstData[3] << 24) | (InstData[2] << 16) | (InstData[1] << 8) | InstData[0];
+
var Encoding = (aInst[3] << 24) | (aInst[2] << 16) | (aInst[1] << 8) | aInst[0];
   
 
if ((Encoding & 0x707F) == 0x2063) { // opcode == "P.BEQIMM" ?
 
if ((Encoding & 0x707F) == 0x2063) { // opcode == "P.BEQIMM" ?
Line 271: Line 261:
 
//
 
//
 
// Operation:
 
// Operation:
// If (Rs1 == Imm5) branch to InstAddr + (Imm12 << 1).
+
// If (Rs1 == Imm5) branch to Addr + (Imm12 << 1).
 
//
 
//
 
// Encoding = {Imm12 | Imm5 | rs1 | funct3 | Imm12 | opcode}
 
// Encoding = {Imm12 | Imm5 | rs1 | funct3 | Imm12 | opcode}
Line 279: Line 269:
 
// - - - 010 - 1100011
 
// - - - 010 - 1100011
 
//
 
//
var a = (Encoding & 0x80) >> 7; // Encoding[7:7]
+
var a = (Encoding >> 7) & 1;
var b = (Encoding & 0xF00) >> 8; // Encoding[11:8]
+
var b = (Encoding >> 8) & 0xF;
var c = (Encoding & 0x7E000000) >> 25; // Encoding[30:25]
+
var c = (Encoding >> 25) & 0x3F;
var d = (Encoding & 0x80000000) >> 25; // Encoding[31]
+
var d = (Encoding >> 31) & 1;
   
 
var Imm12 = (b | (c << 4) | (a << 11) | (d << 12)) << 1;
 
var Imm12 = (b | (c << 4) | (a << 11) | (d << 12)) << 1;
Line 292: Line 282:
 
InstInfo.Mode = 0;
 
InstInfo.Mode = 0;
 
InstInfo.StackAdjust = 0;
 
InstInfo.StackAdjust = 0;
InstInfo.AccessAddr = InstAddr + Imm12;
+
InstInfo.AccessAddr = Addr + Imm12;
 
InstInfo.Flags = 0x1110; // IsBranch | IsConditional | IsFixedAddress
 
InstInfo.Flags = 0x1110; // IsBranch | IsConditional | IsFixedAddress
   
Line 299: Line 289:
 
} // if opcode == "P.BEQIMM"
 
} // if opcode == "P.BEQIMM"
   
} // if InstLen == 4
+
} // if aInst.length == 4
   
 
return undefined;
 
return undefined;
Line 351: Line 341:
 
The command must be executed from script function <code>init</code>.
 
The command must be executed from script function <code>init</code>.
   
<code>Debug.enableOverrideInst(InstLen, Encoding, Mask)</code>
+
<code>Debug.enableOverrideInst(aInst, aMask)</code>
   
 
{| class="wikitable"
 
{| class="wikitable"
Line 358: Line 348:
 
! Type
 
! Type
 
|-
 
|-
  +
| aInst
|InstLen
 
| Instruction length
 
| U32
 
|-
 
|Encoding
 
 
| Instruction bytes
 
| Instruction bytes
 
| byte array
 
| byte array
 
|-
 
|-
| Mask
+
| aMask
| Instruction bits significant for matching. This argument must have the same byte size as argument <i>Encoding</i>. The argument effectively allows users to override multiple instructions at once. This is commonly desirable when overriding all instructions of a particular type.
+
| Instruction bits significant for matching. This argument must have the same byte size as argument <i>aInst</i>. The argument effectively allows users to override multiple instructions at once. This is commonly desirable when overriding all instructions of a particular type.
 
| byte array
 
| byte array
 
|}
 
|}
Line 435: Line 421:
 
This means that a JavaScript plugin can be written once and then used
 
This means that a JavaScript plugin can be written once and then used
 
with both software products.
 
with both software products.
  +
  +
== Plugin Example ==
  +
  +
Shown below is the complete implementation of a disassembly plugin
  +
which adds support for all custom instructions
  +
of the RI5CY MCU core, PULP platform.
  +
  +
<syntaxhighlight lang=javascript>
  +
/*********************************************************************
  +
* SEGGER MICROCONTROLLER GmbH *
  +
* Solutions for real time microcontroller applications *
  +
**********************************************************************
  +
* *
  +
* (c) 1995 - 2019 SEGGER Microcontroller GmbH *
  +
* *
  +
* www.segger.com Support: support@segger.com *
  +
* *
  +
**********************************************************************
  +
* *
  +
* Please note: *
  +
* *
  +
* Knowledge of this file may under no circumstances *
  +
* be used to write a similar product *
  +
* *
  +
* Thank you for your fairness ! *
  +
* *
  +
**********************************************************************
  +
  +
----------------------------------------------------------------------
  +
File : Disassembly_RI5CY.js
  +
Purpose : Ozone disassembly javascript plugin for the RI5CY core
  +
Literature : [1] RI5CY: User Manual
  +
Revision : 1.0
  +
---------------------------END-OF-HEADER------------------------------
  +
*/
  +
  +
/*********************************************************************
  +
*
  +
* init
  +
*
  +
* Function Description
  +
* Called by Ozone when the script was loaded
  +
* (i.e. when command "Project.SetDisassemblyPlugin" was executed).
  +
*
  +
* Typical usage: executes one or multiple "Debug.enableOverrideInst"
  +
* commands which define the instructions whose default disassembly
  +
* is to be overridden by this plugin.
  +
*
  +
* Return Value
  +
* 0: OK
  +
* -1: error
  +
*/
  +
function init() {
  +
return 0;
  +
}
  +
  +
/*********************************************************************
  +
*
  +
* printInstAsm
  +
*
  +
* Function Description
  +
* Prints the assembly code of an instruction.
  +
*
  +
* Function Parameters
  +
* Addr instruction address (type: integer)
  +
* aInst instruction data bytes (type: byte array)
  +
* Flags basic information about the instruction required for analysis.
  +
*
  +
* Return Value
  +
* undefined: if the input instruction is not supported by this plugin or an error occurred
  +
* string: assembly code string containing a single tab after the instruction mnemonic and a single tab before a possible trailing comment
  +
*/
  +
function printInstAsm(Addr, aInst, Flags) {
  +
  +
if (aInst.length != 4) {
  +
return undefined; // the RI5CY ISA-extension has a fixed instruction length of 32 bit
  +
}
  +
var Encoding = (aInst[3] << 24) | (aInst[2] << 16) | (aInst[1] << 8) | aInst[0];
  +
//
  +
// Instruction bitfields
  +
//
  +
var Opcode = (Encoding >> 0) & 0x7F;
  +
var Rd = (Encoding >> 7) & 0x1F;
  +
var Rs1 = (Encoding >> 15) & 0x1F;
  +
var Rs2 = (Encoding >> 20) & 0x1F;
  +
var Funct3 = (Encoding >> 12) & 0x7;
  +
var Funct7 = (Encoding >> 25) & 0x7F;
  +
var Imm5 = (Encoding >> 20) & 0x1F;
  +
var Imm6 = (Encoding >> 20) & 0x3F;
  +
var Imm7 = (Encoding >> 25) & 0x7F;
  +
var Imm12 = (Encoding >> 20) & 0xFFF;
  +
var Ls3 = (Encoding >> 25) & 0x1F;
  +
var Ls2 = (Encoding >> 20) & 0x1F;
  +
var F2 = (Encoding >> 30) & 0x3;
  +
var L = (Encoding >> 7) & 0x1;
  +
var F = (Encoding >> 26) & 0x1;
  +
  +
var sOpcode = "";
  +
var sInst = "";
  +
var sSymbol = "";
  +
  +
switch (Opcode) {
  +
  +
case 11: // Register-Immediate Loads with Post-Increment
  +
  +
if (Funct3 == 7) { // Register-Register Loads with Post-Increment
  +
  +
switch (Funct7) {
  +
case 0: sOpcode = "P.LB"; break;
  +
case 8: sOpcode = "P.LH"; break;
  +
case 16: sOpcode = "P.LW"; break;
  +
case 32: sOpcode = "P.LBU"; break;
  +
case 40: sOpcode = "P.LHU"; break;
  +
default: sOpcode = ""; break;
  +
}
  +
if (Funct3 != 7) {
  +
sOpcode = ""; // invalid encoding
  +
}
  +
if (sOpcode.length) {
  +
sInst = sOpcode + "\t" + regName(Rd) + ", " + regName(Rs2) + "(" + regName(Rs1) + "!)";
  +
}
  +
break;
  +
  +
} else { // Register-Immediate Loads with Post-Increment
  +
  +
switch (Funct3) {
  +
case 0: sOpcode = "P.LB"; break;
  +
case 1: sOpcode = "P.LH"; break;
  +
case 2: sOpcode = "P.LW"; break;
  +
case 4: sOpcode = "P.LBU"; break;
  +
case 5: sOpcode = "P.LHU"; break;
  +
default: sOpcode = ""; break;
  +
}
  +
if (sOpcode.length) {
  +
sInst = sOpcode + "\t" + regName(Rd) + ", " + immName(Imm12) + "(" + regName(Rs1) + "!)";
  +
}
  +
}
  +
break;
  +
  +
case 3: // Register-Register Loads
  +
  +
switch (Funct7) {
  +
case 0: sOpcode = "P.LB"; break;
  +
case 8: sOpcode = "P.LH"; break;
  +
case 16: sOpcode = "P.LW"; break;
  +
case 32: sOpcode = "P.LBU"; break;
  +
case 40: sOpcode = "P.LHU"; break;
  +
default: sOpcode = ""; break;
  +
}
  +
if (Funct3 != 7) {
  +
sOpcode = ""; // invalid encoding
  +
}
  +
if (sOpcode.length) {
  +
sInst = sOpcode + "\t" + regName(Rd) + ", " + regName(Rs2) + "(" + regName(Rs1) + ")";
  +
}
  +
break;
  +
  +
case 43:
  +
  +
if (Funct3 <= 2) { // Register-Immediate Stores with Post-Increment
  +
  +
switch (Funct3) {
  +
case 0: sOpcode = "P.SB"; break;
  +
case 1: sOpcode = "P.SH"; break;
  +
case 2: sOpcode = "P.SW"; break;
  +
default: sOpcode = ""; break;
  +
}
  +
Imm12 = (Imm7 << 5) | Imm5;
  +
  +
if (sOpcode.length) {
  +
sInst = sOpcode + "\t" + regName(Rs2) + ", " + immName(Imm12) + "(" + regName(Rs1) + "!)";
  +
}
  +
} else { // Register-Register Stores with Post-Increment
  +
  +
switch (Funct3) {
  +
case 4: sOpcode = "P.SB"; break;
  +
case 5: sOpcode = "P.SH"; break;
  +
case 6: sOpcode = "P.SW"; break;
  +
default: sOpcode = ""; break;
  +
}
  +
if (sOpcode.length) {
  +
sInst = sOpcode + "\t" + regName(Rs2) + ", " + regName(Rd) + "(" + regName(Rs1) + "!)";
  +
}
  +
}
  +
break;
  +
  +
case 35: // Register-Register Stores
  +
  +
switch (Funct3) {
  +
case 4: sOpcode = "P.SB"; break;
  +
case 5: sOpcode = "P.SH"; break;
  +
case 6: sOpcode = "P.SW"; break;
  +
default: sOpcode = ""; break;
  +
}
  +
if (Funct7 != 0) {
  +
sOpcode = ""; // invalid encoding
  +
}
  +
if (sOpcode.length) {
  +
sInst = sOpcode + "\t" + regName(Rs2) + ", " + regName(Rd) + "(" + regName(Rs1) + ")";
  +
}
  +
break;
  +
  +
case 123: // Hardware Loops
  +
  +
switch (Funct3) {
  +
case 0: sInst = "LP.STARTI" + "\t" + L + ", " + immName(Imm12); break;
  +
case 1: sInst = "LP.ENDI" + "\t" + L + ", " + immName(Imm12); break;
  +
case 2: sInst = "LP.COUNT" + "\t" + L + ", " + regName(Rs1); break;
  +
case 3: sInst = "LP.COUNTI" + "\t" + L + ", " + immName(Imm12); break;
  +
case 4: sInst = "LP.SETUP" + "\t" + L + ", " + regName(Rs1) + ", " + immName(Imm12); break;
  +
case 5: sInst = "LP.SETUPI" + "\t" + L + ", " + immName(Imm5) + ", " + immName(Rs1); break;
  +
default: sInst = ""; break;
  +
}
  +
if (((Encoding >> 8) & 0xF) != 0) {
  +
sInst = ""; // invalid encoding
  +
}
  +
break;
  +
  +
case 51: // Bit Manipulation ops
  +
  +
if (F2 == 3) { // Bit Manipulation, one register and two immediates
  +
  +
switch (Funct3) {
  +
case 0: sOpcode = "P.EXTRACT"; break;
  +
case 1: sOpcode = "P.EXTRACTU"; break;
  +
case 2: sOpcode = "P.INSERT"; break;
  +
case 3: sOpcode = "P.BCLR"; break;
  +
case 4: sOpcode = "P.BSET"; break;
  +
default: sOpcode = ""; break;
  +
}
  +
if (sOpcode.length) {
  +
sInst = sOpcode + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + immName(Ls3) + ", " + immName(Ls2);
  +
}
  +
  +
} else if (Funct7 == 64) { // Bit Manipulation, 2 registers
  +
  +
switch (Funct3) {
  +
case 0: sOpcode = "P.EXTRACTR"; break;
  +
case 1: sOpcode = "P.EXTRACTUR"; break;
  +
case 2: sOpcode = "P.INSERTR"; break;
  +
case 3: sOpcode = "P.BCLRR"; break;
  +
case 4: sOpcode = "P.BSETR"; break;
  +
default: sOpcode = ""; break;
  +
}
  +
if (Ls3 != 0) {
  +
sOpcode = ""; // invalid encoding
  +
}
  +
if (sOpcode.length) {
  +
sInst = sOpcode + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + regName(Rs2);
  +
}
  +
  +
} else if (Funct7 == 4) { // Bit Manipulation, ROR
  +
  +
if (Funct3 == 5) {
  +
sInst = "P.ROR" + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + regName(Rs2);
  +
}
  +
  +
} else if (Funct7 == 8) { // Bit Manipulation, two registers & General ALU
  +
  +
switch (Funct3) {
  +
case 0: sOpcode = "P.FF1"; break; // Bit Manipulation
  +
case 1: sOpcode = "P.FL1"; break;
  +
case 2: sOpcode = "P.CLB"; break;
  +
case 3: sOpcode = "P.CNT"; break;
  +
case 4: sOpcode = "P.EXTHS"; break; // General ALU
  +
case 5: sOpcode = "P.EXTHZ"; break;
  +
case 6: sOpcode = "P.EXTBS"; break;
  +
case 7: sOpcode = "P.EXTBZ"; break;
  +
default: sOpcode = ""; break;
  +
}
  +
if (Rs2 != 0) {
  +
sOpcode = ""; // invalid encoding
  +
}
  +
if (sOpcode.length) {
  +
sInst = sOpcode + "\t" + regName(Rd) + ", " + regName(Rs1);
  +
}
  +
  +
} else if ((Funct7 == 2) && (Funct3 == 0)) { // General ALU ops, ABS
  +
  +
if (Rs2 == 0) {
  +
sInst = "P.ABS" + "\t" + regName(Rd) + ", " + regName(Rs1);
  +
}
  +
  +
} else if (Funct7 == 2) { // General ALU ops, two registers
  +
  +
switch (Funct3) {
  +
case 2: sOpcode = "P.SLET"; break;
  +
case 3: sOpcode = "P.SLETU"; break;
  +
case 4: sOpcode = "P.MIN"; break;
  +
case 5: sOpcode = "P.MINU"; break;
  +
case 6: sOpcode = "P.MAX"; break;
  +
case 7: sOpcode = "P.MAXU"; break;
  +
default: sOpcode = ""; break;
  +
}
  +
if (sOpcode.length) {
  +
sInst = sOpcode + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + regName(Rs2);
  +
}
  +
} else if (Funct7 == 10) { // General ALU ops, 1 register and 1 immediate
  +
  +
switch (Funct3) {
  +
case 1: sInst = "P.CLIP" + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + immName(Ls2); break;
  +
case 2: sInst = "P.CLIPU" + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + immName(Ls2); break;
  +
case 3: sInst = "P.CLIR" + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + regName(Rs2); break;
  +
case 4: sInst = "P.CLIPUR" + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + regName(Rs2); break;
  +
default: sInst = ""; break;
  +
}
  +
} else if (Funct7 == 33) { // Multiply Accumulate ops
  +
  +
if (Funct3 == 0) {
  +
sInst = "P.MAC" + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + regName(Rs2);
  +
} else if (Funct3 == 1) {
  +
sInst = "P.MSU" + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + regName(Rs2);
  +
}
  +
}
  +
break;
  +
  +
case 91: // General ALU ops
  +
  +
if (((F2 == 0) || (F2 == 2)) && ((Funct3 == 2) || (Funct3 == 3) || (Funct3 == 6) || (Funct3 == 7))) { // General ALU ops, two registers and 1 immediate
  +
  +
switch ((F2 << 3) | Funct3) {
  +
case 2: sOpcode = "P.ADDN"; break;
  +
case 18: sOpcode = "P.ADDUN"; break;
  +
case 6: sOpcode = "P.ADDRN"; break;
  +
case 22: sOpcode = "P.ADDURN"; break;
  +
case 3: sOpcode = "P.SUBN"; break;
  +
case 19: sOpcode = "P.SUBUN"; break;
  +
case 7: sOpcode = "P.SUBRN"; break;
  +
case 23: sOpcode = "P.SUBURN"; break;
  +
default: sOpcode = ""; break;
  +
}
  +
if (sOpcode.length) {
  +
sInst = sOpcode + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + regName(Rs2) + ", " + immName(Ls3);
  +
}
  +
} else if (((F2 == 1) || (F2 == 3)) && ((Funct3 == 2) || (Funct3 == 3) || (Funct3 == 6) || (Funct3 == 7))) { // General ALU ops, two registers
  +
  +
switch ((F2 << 3) | Funct3) {
  +
case 10: sOpcode = "P.ADDNR"; break;
  +
case 26: sOpcode = "P.ADDUNR"; break;
  +
case 14: sOpcode = "P.ADDRNR"; break;
  +
case 30: sOpcode = "P.ADDURNR"; break;
  +
case 11: sOpcode = "P.SUBNR"; break;
  +
case 27: sOpcode = "P.SUBUNR"; break;
  +
case 15: sOpcode = "P.SUBRNR"; break;
  +
case 31: sOpcode = "P.SUBURNR"; break;
  +
default: sOpcode = ""; break;
  +
}
  +
if (Ls3 != 0) {
  +
sOpcode = ""; // invalid encoding
  +
}
  +
if (sOpcode.length) {
  +
sInst = sOpcode + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + regName(Rs2);
  +
}
  +
} else if ((Ls3 == 0) && (Funct3 == 0)) { // Multiply Accumulate, 2 registers
  +
  +
switch (F2) {
  +
case 0: sOpcode = "P.MULU"; break;
  +
case 1: sOpcode = "P.MULHHU"; break;
  +
case 2: sOpcode = "P.MULS"; break;
  +
case 3: sOpcode = "P.MULHHS"; break;
  +
default: sOpcode = ""; break;
  +
}
  +
if (sOpcode.length) {
  +
sInst = sOpcode + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + regName(Rs2);
  +
}
  +
  +
} else { // Multiply Accumulate, 2 registers and 1 immediate
  +
  +
switch ((F2 << 3) | Funct3) {
  +
case 16: sOpcode = "P.MULSN"; break;
  +
case 24: sOpcode = "P.MULHHSN"; break;
  +
case 20: sOpcode = "P.MULSRN"; break;
  +
case 28: sOpcode = "P.MULHHSRN"; break;
  +
case 0: sOpcode = "P.MULUN"; break;
  +
case 8: sOpcode = "P.MULHHUN"; break;
  +
case 4: sOpcode = "P.MULURN"; break;
  +
case 12: sOpcode = "P.MULHHURN"; break;
  +
case 17: sOpcode = "P.MACSN"; break;
  +
case 25: sOpcode = "P.MACHHSN"; break;
  +
case 21: sOpcode = "P.MACSRN"; break;
  +
case 29: sOpcode = "P.MACHHSRN"; break;
  +
case 1: sOpcode = "P.MACUN"; break;
  +
case 9: sOpcode = "P.MACHHUN"; break;
  +
case 5: sOpcode = "P.MACURN"; break;
  +
case 13: sOpcode = "P.MACHHURN"; break;
  +
default: sOpcode = ""; break;
  +
}
  +
if (sOpcode.length) {
  +
sInst = sOpcode + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + regName(Rs2) + ", " + immName(Ls3);
  +
}
  +
}
  +
break;
  +
  +
case 99: // Immediate Branching ops
  +
  +
var a = (Encoding >> 7) & 1;
  +
var b = (Encoding >> 8) & 0xF;
  +
var c = (Encoding >> 25) & 0x3F;
  +
var d = (Encoding >> 31) & 1;
  +
Imm12 = (b | (c << 4) | (a << 11) | (d << 12)) << 1;
  +
  +
sSymbol = Debug.getSymbol(Addr + Imm12);
  +
  +
if (Funct3 == 2) {
  +
sOpcode = "P.BEQIMM";
  +
} else if (Funct3 == 3) {
  +
sOpcode = "P.BNEQIMM";
  +
} else {
  +
sOpcode = "";
  +
}
  +
if (sOpcode.length) {
  +
sInst = sOpcode + "\t" + regName(Rs1) + ", " + Imm5 + ", " + Imm12;
  +
if (sSymbol.length) {
  +
sInst = sInst + "\t; " + sSymbol;
  +
}
  +
}
  +
} // end switch(Opcode)
  +
  +
if (sInst.length) {
  +
return sInst;
  +
} else {
  +
return undefined;
  +
}
  +
}
  +
  +
/*********************************************************************
  +
*
  +
* getInstInfo
  +
*
  +
* Function Description
  +
* Returns information about an instruction.
  +
*
  +
* Used by Ozone to generate timeline stacks and call-graphs,
  +
* amongst other applications.
  +
*
  +
* Function Parameters
  +
* Addr instruction address (type: integer)
  +
* aInst instruction data bytes (type: byte array)
  +
* Flags basic information about the instruction required for analysis.
  +
*
  +
* Return Value
  +
* undefined: if the input instruction is not supported by this plugin or an error occurred
  +
* InstInfo: an object corresponding to the following C structure:
  +
*
  +
* struct INST_INFO {
  +
* U32 Mode; // instruction execution mode (for ex. THUMB or ARM)
  +
* U32 Size; // instruction byte size
  +
* U64 AccessAddr; // branch address or memory access address
  +
* int StackAdjust; // Difference of SP before and after instruction execution
  +
* struct {
  +
* U32 IsValid : 1; // all fields initialized
  +
* U32 IsControlTransfer : 1; // Instruction possibly alters the PC (synchronously or asynchronously) (WFI, SVC, POP PC, LDR PC, ...)
  +
* U32 IsSoftIRQ : 1; // Instruction is a software interrupt request
  +
* U32 IsBranch : 1; // Instruction is a simple branch (B, JMP, ...)
  +
* U32 IsCall : 1; // Instruction is a function call (Branch with Link, BL, CALL, ...)
  +
* U32 IsReturn : 1; // Dedicated return instruction or return-style branch (e.g. POP PC)
  +
* U32 IsMemAccess : 1; // Instruction reads from or writes to memory
  +
* U32 IsFixedAddress : 1; // Branch or access address is fixed (absolute or PC-relative)
  +
* U32 IsBP : 1; // Instruction is a SW Breakpoint
  +
* U32 IsSemiHosting : 1; // Instruction could be a semihosting instruction (BKPT 0xAB or SVC 0xAB or SVC 0x123456)
  +
* U32 IsNOP : 1; // Instruction is a NOP
  +
* U32 IsConditional : 1; // Instruction is conditionally executed
  +
* U32 Condition : 4; // Condition if conditionally executed
  +
* } Flags;
  +
* }
  +
*/
  +
function getInstInfo(Addr, aInst, Flags) {
  +
  +
if (aInst.length != 4) {
  +
return undefined; // the RI5CY ISA extension has a fixed instruction length of 32 bit
  +
}
  +
var Encoding = (aInst[3] << 24) | (aInst[2] << 16) | (aInst[1] << 8) | aInst[0];
  +
var Opcode = (Encoding >> 0) & 0x7F;
  +
  +
var InstInfo = new Object();
  +
InstInfo.Size = 4;
  +
InstInfo.Mode = 0;
  +
InstInfo.AccessAddr = 0;
  +
InstInfo.StackAdjust = 0;
  +
InstInfo.Flags = 0;
  +
  +
switch (Opcode) {
  +
  +
case 11: // Register-Immediate Loads with Post-Increment
  +
  +
InstInfo.Flags.IsValid = 1;
  +
InstInfo.Flags.IsMemAccess = 1;
  +
break;
  +
  +
case 3: // Register-Register Loads
  +
  +
InstInfo.Flags.IsValid = 1;
  +
InstInfo.Flags.IsMemAccess = 1;
  +
break;
  +
  +
case 43: // Register Stores with Post-Increment
  +
  +
InstInfo.Flags.IsValid = 1;
  +
InstInfo.Flags.IsMemAccess = 1;
  +
break;
  +
  +
case 35: // Register-Register Stores
  +
  +
InstInfo.Flags.IsValid = 1;
  +
InstInfo.Flags.IsMemAccess = 1;
  +
break;
  +
  +
case 123: // Hardware Loops
  +
  +
InstInfo.Flags.IsValid = 1;
  +
break;
  +
  +
case 51: // Bit Manipulation ops
  +
  +
InstInfo.Flags.IsValid = 1;
  +
break;
  +
  +
case 91: // General ALU ops
  +
  +
InstInfo.Flags.IsValid = 1;
  +
break;
  +
  +
case 99: // Immediate Branching ops
  +
  +
var a = (Encoding >> 7) & 1;
  +
var b = (Encoding >> 8) & 0xF;
  +
var c = (Encoding >> 25) & 0x3F;
  +
var d = (Encoding >> 31) & 1;
  +
var Imm12 = (b | (c << 4) | (a << 11) | (d << 12)) << 1;
  +
  +
InstInfo.AccessAddr = Addr + Imm12;
  +
InstInfo.Flags.IsValid = 1;
  +
InstInfo.Flags.IsBranch = 1;
  +
InstInfo.Flags.IsFixedAddr = 1;
  +
InstInfo.Flags.IsConditional = 1;
  +
  +
} // end switch(Opcode)
  +
  +
if (InstInfo.Flags.IsValid) {
  +
return InstInfo;
  +
} else {
  +
return undefined;
  +
}
  +
}
  +
  +
/*********************************************************************
  +
*
  +
* regName
  +
*
  +
* Function Description
  +
* Helper function. Returns the ABI name of a RISCV-register.
  +
*/
  +
function regName(r) {
  +
  +
var sReg;
  +
  +
switch(r) {
  +
case 0: sReg = "zero"; break;
  +
case 1: sReg = "ra"; break;
  +
case 2: sReg = "sp"; break;
  +
case 3: sReg = "gp"; break;
  +
case 4: sReg = "tp"; break;
  +
case 5: sReg = "t0"; break;
  +
case 6: sReg = "t1"; break;
  +
case 7: sReg = "t2"; break;
  +
case 8: sReg = "fp"; break;
  +
case 9: sReg = "s1"; break;
  +
case 10: sReg = "a0"; break;
  +
case 11: sReg = "a1"; break;
  +
case 12: sReg = "a2"; break;
  +
case 13: sReg = "a3"; break;
  +
case 14: sReg = "a4"; break;
  +
case 15: sReg = "a5"; break;
  +
case 16: sReg = "a6"; break;
  +
case 17: sReg = "a7"; break;
  +
case 18: sReg = "s2"; break;
  +
case 19: sReg = "s3"; break;
  +
case 20: sReg = "s4"; break;
  +
case 21: sReg = "s5"; break;
  +
case 22: sReg = "s6"; break;
  +
case 23: sReg = "s7"; break;
  +
case 24: sReg = "s8"; break;
  +
case 25: sReg = "s9"; break;
  +
case 26: sReg = "s10"; break;
  +
case 27: sReg = "s11"; break;
  +
case 28: sReg = "t3"; break;
  +
case 29: sReg = "t4"; break;
  +
case 30: sReg = "t5"; break;
  +
case 31: sReg = "t6"; break;
  +
default: sReg = "?"; break;
  +
}
  +
return sReg;
  +
}
  +
  +
/*********************************************************************
  +
*
  +
* immName
  +
*
  +
* Function Description
  +
* Helper function. Returns an unsigned immediate value as string
  +
*/
  +
function immName(Imm) {
  +
  +
var sImm;
  +
  +
if (Imm < 0x20) { // print small immediates in decimal
  +
sImm = Imm.toString(10);
  +
} else { // print large immediates in hexadecimal
  +
sImm = Imm.toString(16);
  +
}
  +
return sImm;
  +
}
  +
  +
/*************************** End of file ****************************/
  +
</syntaxhighlight>
   
 
== References ==
 
== References ==
Line 440: Line 1,041:
 
# Ozone User Guide
 
# Ozone User Guide
 
# RI5CY User Manual
 
# RI5CY User Manual
# J-Link User Guide
+
# J-Link / J-Trace User Guide

Revision as of 10:06, 13 December 2019


Introduction

With RISC-V and the advent of custom instructions in Armv8-A in mid 2019, all of Ozone's supported target architectures now allow MCU core vendors to add custom instructions to their design.

Considering this technological development, it became highly desirable to supply customers a tool for extending Ozone's instruction set knowledge on a particular architecture as well.

Since version 2.71a, Ozone supports custom instructions via disassembly support plugins.

Disassembly Plugins

A disassembly plugin extends Ozone's disassembler by:

  1. providing the assembly code of custom instructions.
  2. providing numerical information about custom instructions, such as the PC branched to. Ozone broadly relies on numerical instruction information in multiple areas, such as its call graph window.

Disassembly plugins are written in JavaScript. All of JavaScript's basic language constructs are supported. Ozone poses a single requirement on disassembly plugins which is that all script code must be contained within functions.

Loading The Plugin

Command Project.SetDisassemblyPlugin is provided to load a disassembly plugin. When this command is placed into project file function OnProjectLoad, the plugin will be loaded each time the project is opened. The command has a single argument, which is the file path.

Users may alternatively execute action Set Script of the disassembly window context menu in order to load a disassembly plugin. When executed, this action will also edit the project file accordingly.

Script Functions Overview

A disassembly plugin consists of 3 predefined functions:

Function Description Executed When Optional
init Performs initialization tasks plugin load Yes
printInstAsm Returns the disassembly text of a custom (or overridden) instruction on-demand Yes
getInstInfo Returns numeric information about a custom (or overridden) instruction, such as the PC branched to program file load Yes

Next to the predefined script functions, users are free to add their own functions to disassembly plugins in order to structure the code.

Script Functions

This section demonstrates the use of each script function on the basis of an example implementation. The example implementation adds support for a custom instruction on a RI5CY RISC-V MCU core.

init

A disassembly plugin implementation typically starts with script function init. This function is called when the disassembly plugin is loaded. The main purpose of function init is to provide a place where instruction overrides using command Debug.enableOverrindeInst can be defined. An instruction override allows users to alter the disassembly and numerical information of a known instruction.

/*********************************************************************
*
*       init
*
*  Function Description
*    Called by Ozone when the script was loaded 
*    (i.e. when command "Project.SetDisassemblyPlugin" was executed).
*
*    Typical usage: executes one or multiple "Debug.enableOverrideInst"
*    commands which define the instructions whose default disassembly 
*    is to be overridden by this plugin.
*
*  Return Value
*     0:  OK
*    -1:  error
*/
function init() {

  var aInst = new Array();
  var aMask = new Array();
  //
  // Mark the instruction "ADDI sp, sp, -16" (0x1141) as overridden by this plugin
  //
  aInst[0] = 0x41;
  aInst[1] = 0x11;
  aMask[0] = 0xFF; // all encoding bits are relevant
  aMask[1] = 0xFF; // all encoding bits are relevant

  Debug.enableOverrideInst(aInst, aMask);

  return 0;
}

This example implementation of init overrides the instruction with integer encoding 0x1141.

printInstAsm

Next, we implement function printInstAsm in order to:

  • provide the disassembly of custom instruction "P.BEQIMM" with integer encoding 0x06362A63
  • provide the disassembly of overridden instruction 0x1141
/*********************************************************************
*
*       printInstAsm
*
*  Function Description
*    Prints the assembly code of an instruction.
*
*  Function Parameters
*    Addr   instruction address (type: U64)
*    aInst  instruction bytes (type: byte array)
*    Flags  basic information about the instruction required for analysis. 
*           Interpretation depends on architecture. 
*
*  Return Value
*    undefined:   if the input instruction is not supported by this plugin
*    string:      assembly code string containing a single tab after the instruction
*                 mnemonic and a single tab before a possible trailing comment
*/ 
function printInstAsm(Addr, aInst, Flags) {

  if (aInst.length == 4) {

    var Encoding = (aInst[3] << 24) | (aInst2] << 16) | 
                   (aInst[1] << 8)  | aInst[0];

    if ((Encoding & 0x707F) == 0x2063) { // opcode == "P.BEQIMM" ?
      // 
      // "P.BEQIMM" is a PC-relative conditional branch
      //
      // Operation:
      //     If (Rs1 == Imm5) branch to Addr + (Imm12 << 1).
      //
      // Encoding = {Imm12 | Imm5 | rs1 | funct3 | Imm12 | opcode}
      //            ----------------------------------------------
      //            [31:25]               [14:12]          [6:0]     
      //            ----------------------------------------------     
      //              -        -     -      010      -    1100011
      //
      var a       =  (Encoding >> 7)  & 1;
      var b       =  (Encoding >> 8)  & 0xF;
      var c       =  (Encoding >> 25) & 0x3F;
      var d       =  (Encoding >> 31) & 1;
      var Imm5    =  (Encoding >> 20) & 0x1F;
      var Rs1     =  (Encoding >> 15) & 0x1F;

      var Imm12   =  (b | (c << 4) | (a << 11) | (d << 12)) << 1;

      var sSymbol =  Debug.getSymbol(Addr + Imm12);

      var sInst   = "P.BEQIMM\t" + getRegName(Rs1) + ", " + Imm5 + ", " + Imm12;

      if (sSymbol == "") {
        return sInst;
      } else {
        return sInst + "\t; " + sSymbol;
      }
    }

  } else if (aInst.length == 2) {
    var Encoding = (aInst[1] << 8) | aInst[0];
    if (Encoding == 0x1141) { // "ADDI sp, sp, -16" ?
      return "ADDI\tsp, sp, -0x10";
    }
  }
  return undefined;
}

A typical implementation of printInstAsm will be largely based on integer arithmetic, as this example illustrates. The example executes a single debugger API command with Debug.getSymbol. This command returns the name of the symbol at or preceding the input address. The symbol name is appended as comment to the returned assembly code text. Function getRegName is a user-defined script function which returns the name of a RISC-V register.

The text returned by function printInstAsm must have the following format: <mnemonic>\t<operands>\t;<comment> for example: P.BEQIMM\ta2, 3, 116\t; OS_Idle

getInstInfo

We also want the disassembly plugin to provide numerical information about custom instruction "P.BEQIMM" to Ozone, such as the branch destination PC. This will allow Ozone to assemble and display correct information in areas that are based on numerical instruction information, such as the call-graph window.

The plugin delivers numerical instruction information to Ozone via script function getInstInfo.

/*********************************************************************
*
*       getInstInfo
*
*  Function Description
*    Returns numerical information about an instruction.
*
*    Used by Ozone to generate timeline stacks and call-graphs,
*    among other applications.
*
*  Function Parameters
*    Addr   instruction address (type: U64)
*    aInst  instruction data bytes (type: byte array)
*    Flags  basic information about the instruction required for analysis.
*           Interpretation depends on architecture (type: U32)
*
*  Return Value
*    undefined:  if the input instruction is not supported by this plugin
*    InstInfo:   a javascript object corresponding to the following C structure:
*          
*    struct INST_INFO {
*      U32 Mode;         // instruction execution mode (for ex. THUMB or ARM)
*      U32 Size;         // instruction byte size
*      U64 AccessAddr;   // branch address or memory access address
*      int StackAdjust;  // Difference of SP before and after instruction execution
*      U32 Flags;        // binary instruction information
*    }
*/
function getInstInfo(Addr, aInst, Flags) {

  if (aInst.length == 4) {

    var Encoding = (aInst[3] << 24) | (aInst[2] << 16) | (aInst[1] << 8) | aInst[0];

    if ((Encoding & 0x707F) == 0x2063) { // opcode == "P.BEQIMM" ?
      // 
      // "P.BEQIMM" is a PC-relative conditional branch
      //
      // Operation:
      //     If (Rs1 == Imm5) branch to Addr + (Imm12 << 1).
      //
      // Encoding = {Imm12 | Imm5 | rs1 | funct3 | Imm12 | opcode}
      //            ----------------------------------------------
      //            [31:25]               [14:12]          [6:0]     
      //            ----------------------------------------------     
      //              -        -     -      010      -    1100011
      //
      var a  =  (Encoding >> 7)  & 1;
      var b  =  (Encoding >> 8)  & 0xF;
      var c  =  (Encoding >> 25) & 0x3F;
      var d  =  (Encoding >> 31) & 1;

      var Imm12 = (b | (c << 4) | (a << 11) | (d << 12)) << 1;

      var InstInfo;

      InstInfo             = new Object();
      InstInfo.Size        = 4;
      InstInfo.Mode        = 0;
      InstInfo.StackAdjust = 0;
      InstInfo.AccessAddr  = Addr + Imm12;
      InstInfo.Flags       = 0x1110; // IsBranch | IsConditional | IsFixedAddress

      return InstInfo;

    } // if opcode == "P.BEQIMM"

  } // if aInst.length == 4

  return undefined;
}

as demonstrated in the example above, numerical instruction information is returned as a JavaScript object containing a predefined set of members. The 32 bit unsigned Flags member of this object has the following bit field layout:

struct {  
  U32 IsValid        : 1; // all fields initialized                     
  U32 IsCtrlTransfer : 1; // Instruction possibly alters the PC
  U32 IsSoftIRQ      : 1; // Instruction is a software interrupt request
  U32 IsBranch       : 1; // Instruction is a simple branch (B, JMP, ...)
  U32 IsCall         : 1; // Instruction is a function call (Branch with Link, BL, CALL, ...)
  U32 IsReturn       : 1; // Dedicated return instruction or return-style branch (e.g. POP PC)
  U32 IsMemAccess    : 1; // Instruction reads from or writes to memory
  U32 IsFixedAddress : 1; // Branch or access address is fixed (absolute or PC-relative)
  U32 IsBP           : 1; // Instruction is a SW breakpoint
  U32 IsSemiHosting  : 1; // Instruction could be a semihosting instruction
  U32 IsNOP          : 1; // Instruction is a NOP
  U32 IsConditional  : 1; // Instruction is conditionally executed
  U32 Condition      : 4; // Condition if conditionally executed
} Flags;

This concludes the plugin example. We have seen that from a top-level perspective, a disassembly plugin consists of 3 predefined functions.

API Commands

This section summarizes Ozone JavaScript API commands which are relevant for the programming of disassembly plugins.

Debug.getSymbol

Returns the name of the symbol at or preceding the input address. Ozone only considers symbols of variable, constant, function and assembly label type for the return value. A typical use case for this command is to obtain the label of a branch instruction.

Debug.getSymbol(U64 Addr)

Return Value

  • symbol name on success
  • undefined when no symbol could be found

Debug.enableOverrideInst

Allows plugin developers to override Ozone's build-in disassembler. The command must be executed from script function init.

Debug.enableOverrideInst(aInst, aMask)

Parameter Description Type
aInst Instruction bytes byte array
aMask Instruction bits significant for matching. This argument must have the same byte size as argument aInst. The argument effectively allows users to override multiple instructions at once. This is commonly desirable when overriding all instructions of a particular type. byte array

Return Value

  • 0 on success
  • -1 on error

TargetInterface.peekBytes

Returns target memory data. An exemplary use case for this command is to retrieve the word at the load/store location of a custom instruction.

TargetInterface.peekBytes(Addr, Size)

Parameter Description Type
Addr Memory address U64
Size Byte size U32

Return Value

  • memory data (byte array) on success
  • undefined on error

The Flags Parameter

The 32 bit unsigned Flags parameter of script functions printInstAsm and getInstInfo provides basic instruction information required for disassembly and analysis. The interpretation of this parameter depends on the target architecture, as explained below.

Flags on ARM

Value Description
0 Address is contained within a (code-inline) data segment
1 Address is contained within an AArch32 thumb code segment
2 Address is contained within an AArch32 ARM code segment
3 Address is contained within an AArch64 code segment

Flags on RISC-V

The Flags parameter currently has no meaning on RISC-V.

Embedded Studio Compatibility

Ozone JavaScript plugins for disassembly support and RTOS awareness share a common JavaScript API. This API is described in Ozone user guide section JavaScript Classes and fully compatible with Embedded Studio. This means that a JavaScript plugin can be written once and then used with both software products.

Plugin Example

Shown below is the complete implementation of a disassembly plugin which adds support for all custom instructions of the RI5CY MCU core, PULP platform.

/*********************************************************************
*               SEGGER MICROCONTROLLER GmbH                          *
*       Solutions for real time microcontroller applications         *
**********************************************************************
*                                                                    *
*       (c) 1995 - 2019  SEGGER Microcontroller GmbH                 *
*                                                                    *
*       www.segger.com     Support: support@segger.com               *
*                                                                    *
**********************************************************************
*                                                                    *
*       Please note:                                                 *
*                                                                    *
*       Knowledge of this file may under no circumstances            *
*       be used to write a similar product                           *
*                                                                    *
*       Thank you for your fairness !                                *
*                                                                    *
**********************************************************************

----------------------------------------------------------------------
File        : Disassembly_RI5CY.js
Purpose     : Ozone disassembly javascript plugin for the RI5CY core
Literature  : [1] RI5CY: User Manual
Revision    : 1.0
---------------------------END-OF-HEADER------------------------------
*/

/*********************************************************************
*
*       init
*
*  Function Description
*    Called by Ozone when the script was loaded 
*    (i.e. when command "Project.SetDisassemblyPlugin" was executed).
*
*    Typical usage: executes one or multiple "Debug.enableOverrideInst"
*    commands which define the instructions whose default disassembly 
*    is to be overridden by this plugin.
*
*  Return Value
*     0:  OK
*    -1:  error
*/
function init() {
  return 0;
}

/*********************************************************************
*
*       printInstAsm
*
*  Function Description
*    Prints the assembly code of an instruction.
*
*  Function Parameters
*    Addr     instruction address (type: integer)
*    aInst    instruction data bytes (type: byte array)
*    Flags    basic information about the instruction required for analysis.
*
*  Return Value
*    undefined:   if the input instruction is not supported by this plugin or an error occurred
*    string:      assembly code string containing a single tab after the instruction mnemonic and a single tab before a possible trailing comment 
*/ 
function printInstAsm(Addr, aInst, Flags) {
  
  if (aInst.length != 4) {
    return undefined; // the RI5CY ISA-extension has a fixed instruction length of 32 bit
  }
  var Encoding = (aInst[3] << 24) | (aInst[2] << 16) | (aInst[1] << 8) | aInst[0];
  //
  // Instruction bitfields
  //
  var Opcode   = (Encoding >> 0)  & 0x7F;
  var Rd       = (Encoding >> 7)  & 0x1F;
  var Rs1      = (Encoding >> 15) & 0x1F;
  var Rs2      = (Encoding >> 20) & 0x1F;
  var Funct3   = (Encoding >> 12) & 0x7;
  var Funct7   = (Encoding >> 25) & 0x7F;
  var Imm5     = (Encoding >> 20) & 0x1F;
  var Imm6     = (Encoding >> 20) & 0x3F;
  var Imm7     = (Encoding >> 25) & 0x7F;
  var Imm12    = (Encoding >> 20) & 0xFFF;
  var Ls3      = (Encoding >> 25) & 0x1F;
  var Ls2      = (Encoding >> 20) & 0x1F;
  var F2       = (Encoding >> 30) & 0x3;
  var L        = (Encoding >> 7)  & 0x1;
  var F        = (Encoding >> 26) & 0x1;
 
  var sOpcode  = "";
  var sInst    = "";
  var sSymbol  = "";

  switch (Opcode) {

  case 11: // Register-Immediate Loads with Post-Increment

    if (Funct3 == 7) { // Register-Register Loads with Post-Increment

      switch (Funct7) {
        case 0:   sOpcode = "P.LB";  break;
        case 8:   sOpcode = "P.LH";  break;
        case 16:  sOpcode = "P.LW";  break;
        case 32:  sOpcode = "P.LBU"; break;
        case 40:  sOpcode = "P.LHU"; break;
        default:  sOpcode = "";      break;
      }
      if (Funct3 != 7) {
        sOpcode = ""; // invalid encoding
      }
      if (sOpcode.length) {
        sInst = sOpcode + "\t" + regName(Rd) + ", " + regName(Rs2) + "(" + regName(Rs1) + "!)";
      }
      break;

    } else { // Register-Immediate Loads with Post-Increment

      switch (Funct3) {
        case 0:  sOpcode = "P.LB";  break;
        case 1:  sOpcode = "P.LH";  break;
        case 2:  sOpcode = "P.LW";  break;
        case 4:  sOpcode = "P.LBU"; break;
        case 5:  sOpcode = "P.LHU"; break;
        default: sOpcode = "";      break;
      }
      if (sOpcode.length) {
        sInst = sOpcode + "\t" + regName(Rd) + ", " + immName(Imm12) + "(" + regName(Rs1) + "!)";
      }
    }
    break;
  
  case 3: // Register-Register Loads

    switch (Funct7) {
      case 0:   sOpcode = "P.LB";  break;
      case 8:   sOpcode = "P.LH";  break;
      case 16:  sOpcode = "P.LW";  break;
      case 32:  sOpcode = "P.LBU"; break;
      case 40:  sOpcode = "P.LHU"; break;
      default:  sOpcode = "";      break;
    }
    if (Funct3 != 7) {
      sOpcode = ""; // invalid encoding
    }
    if (sOpcode.length) {
      sInst = sOpcode + "\t" + regName(Rd) + ", " + regName(Rs2) + "(" + regName(Rs1) + ")";
    }
    break;

  case 43: 

    if (Funct3 <= 2) { // Register-Immediate Stores with Post-Increment

      switch (Funct3) {
        case 0:  sOpcode = "P.SB";  break;
        case 1:  sOpcode = "P.SH";  break;
        case 2:  sOpcode = "P.SW";  break;
        default: sOpcode = "";      break;
      }
      Imm12 = (Imm7 << 5) | Imm5;

      if (sOpcode.length) {
        sInst = sOpcode + "\t" + regName(Rs2) + ", " + immName(Imm12) + "(" + regName(Rs1) + "!)";
      }
    } else { // Register-Register Stores with Post-Increment

      switch (Funct3) {
        case 4:  sOpcode = "P.SB";  break;
        case 5:  sOpcode = "P.SH";  break;
        case 6:  sOpcode = "P.SW";  break;
        default: sOpcode = "";      break;
      }
      if (sOpcode.length) {
        sInst = sOpcode + "\t" + regName(Rs2) + ", " + regName(Rd) + "(" + regName(Rs1) + "!)";
      }
    }
    break;

  case 35: // Register-Register Stores

    switch (Funct3) {
      case 4:  sOpcode = "P.SB";  break;
      case 5:  sOpcode = "P.SH";  break;
      case 6:  sOpcode = "P.SW";  break;
      default: sOpcode = "";      break;
    }
    if (Funct7 != 0) {
      sOpcode = ""; // invalid encoding
    }
    if (sOpcode.length) {
      sInst = sOpcode + "\t" + regName(Rs2) + ", " + regName(Rd) + "(" + regName(Rs1) + ")";
    }
    break;

  case 123: // Hardware Loops

    switch (Funct3) {
      case 0:  sInst = "LP.STARTI" + "\t" + L + ", " + immName(Imm12); break;
      case 1:  sInst = "LP.ENDI"   + "\t" + L + ", " + immName(Imm12); break;  
      case 2:  sInst = "LP.COUNT"  + "\t" + L + ", " + regName(Rs1);   break; 
      case 3:  sInst = "LP.COUNTI" + "\t" + L + ", " + immName(Imm12); break;
      case 4:  sInst = "LP.SETUP"  + "\t" + L + ", " + regName(Rs1)  + ", " + immName(Imm12); break; 
      case 5:  sInst = "LP.SETUPI" + "\t" + L + ", " + immName(Imm5) + ", " + immName(Rs1);   break;
      default: sInst = ""; break;   
    }
    if (((Encoding >> 8) & 0xF) != 0) {
      sInst = ""; // invalid encoding
    }
    break;

  case 51: //  Bit Manipulation ops

    if (F2 == 3) { // Bit Manipulation, one register and two immediates

      switch (Funct3) {
        case 0:  sOpcode = "P.EXTRACT";  break;
        case 1:  sOpcode = "P.EXTRACTU"; break;
        case 2:  sOpcode = "P.INSERT";   break;
        case 3:  sOpcode = "P.BCLR";     break;
        case 4:  sOpcode = "P.BSET";     break;
        default: sOpcode = "";           break;
      }
      if (sOpcode.length) {
        sInst = sOpcode + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + immName(Ls3) + ", " + immName(Ls2);
      }

    } else if (Funct7 == 64) { // Bit Manipulation, 2 registers

      switch (Funct3) {
        case 0:  sOpcode = "P.EXTRACTR";  break;
        case 1:  sOpcode = "P.EXTRACTUR"; break;
        case 2:  sOpcode = "P.INSERTR";   break;
        case 3:  sOpcode = "P.BCLRR";     break;
        case 4:  sOpcode = "P.BSETR";     break;
        default: sOpcode = "";            break;
      }
      if (Ls3 != 0) {
         sOpcode = ""; // invalid encoding
      }
      if (sOpcode.length) {
        sInst = sOpcode + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + regName(Rs2);
      }
    
    } else if (Funct7 == 4) { // Bit Manipulation, ROR
      
      if (Funct3 == 5) {
        sInst = "P.ROR" + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + regName(Rs2);
      }

    } else if (Funct7 == 8) { // Bit Manipulation, two registers & General ALU
    
      switch (Funct3) {
        case 0:  sOpcode = "P.FF1";    break; // Bit Manipulation
        case 1:  sOpcode = "P.FL1";    break;
        case 2:  sOpcode = "P.CLB";    break;
        case 3:  sOpcode = "P.CNT";    break;
        case 4:  sOpcode = "P.EXTHS";  break; // General ALU 
        case 5:  sOpcode = "P.EXTHZ";  break;
        case 6:  sOpcode = "P.EXTBS";  break;
        case 7:  sOpcode = "P.EXTBZ";  break;
        default: sOpcode = "";         break;
      }
      if (Rs2 != 0) {
        sOpcode = ""; // invalid encoding
      }
      if (sOpcode.length) {
        sInst = sOpcode + "\t" + regName(Rd) + ", " + regName(Rs1);
      }

    } else if ((Funct7 == 2) && (Funct3 == 0)) { // General ALU ops, ABS

      if (Rs2 == 0) {
        sInst = "P.ABS" + "\t" + regName(Rd) + ", " + regName(Rs1);   
      }

    } else if (Funct7 == 2) { // General ALU ops, two registers

      switch (Funct3) {
        case 2:  sOpcode = "P.SLET";  break;
        case 3:  sOpcode = "P.SLETU"; break;
        case 4:  sOpcode = "P.MIN";   break;
        case 5:  sOpcode = "P.MINU";  break;
        case 6:  sOpcode = "P.MAX";   break;
        case 7:  sOpcode = "P.MAXU";  break;
        default: sOpcode = "";        break;
      }
      if (sOpcode.length) {
        sInst = sOpcode + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + regName(Rs2);
      }
    } else if (Funct7 == 10) { // General ALU ops, 1 register and 1 immediate

      switch (Funct3) {
        case 1:  sInst = "P.CLIP"   + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + immName(Ls2); break;
        case 2:  sInst = "P.CLIPU"  + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + immName(Ls2); break;
        case 3:  sInst = "P.CLIR"   + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + regName(Rs2); break;
        case 4:  sInst = "P.CLIPUR" + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + regName(Rs2); break;
        default: sInst = ""; break;
      }
    } else if (Funct7 == 33) { // Multiply Accumulate ops

      if (Funct3 == 0) {
        sInst = "P.MAC" + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + regName(Rs2);
      } else if (Funct3 == 1) {
        sInst = "P.MSU" + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + regName(Rs2);
      }
    }
    break;

  case 91: // General ALU ops
   
    if (((F2 == 0) || (F2 == 2)) && ((Funct3 == 2) || (Funct3 == 3) || (Funct3 == 6) || (Funct3 == 7))) { // General ALU ops, two registers and 1 immediate

      switch ((F2 << 3) | Funct3) {
        case 2:  sOpcode = "P.ADDN";    break;
        case 18: sOpcode = "P.ADDUN";   break;
        case 6:  sOpcode = "P.ADDRN";   break;
        case 22: sOpcode = "P.ADDURN";  break;
        case 3:  sOpcode = "P.SUBN";    break;
        case 19: sOpcode = "P.SUBUN";   break;
        case 7:  sOpcode = "P.SUBRN";   break;
        case 23: sOpcode = "P.SUBURN";  break;
        default: sOpcode = "";          break;
      }
      if (sOpcode.length) {
        sInst = sOpcode + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + regName(Rs2) + ", " + immName(Ls3);
      }
    } else if (((F2 == 1) || (F2 == 3)) && ((Funct3 == 2) || (Funct3 == 3) || (Funct3 == 6) || (Funct3 == 7))) { // General ALU ops, two registers
      
      switch ((F2 << 3) | Funct3) {
        case 10: sOpcode = "P.ADDNR";   break;
        case 26: sOpcode = "P.ADDUNR";  break;
        case 14: sOpcode = "P.ADDRNR";  break;
        case 30: sOpcode = "P.ADDURNR"; break;
        case 11: sOpcode = "P.SUBNR";   break;
        case 27: sOpcode = "P.SUBUNR";  break;
        case 15: sOpcode = "P.SUBRNR";  break;
        case 31: sOpcode = "P.SUBURNR"; break;
        default: sOpcode = "";          break;
      }
      if (Ls3 != 0) {
        sOpcode = ""; // invalid encoding
      }
      if (sOpcode.length) {
        sInst = sOpcode + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + regName(Rs2);
      }
    } else if ((Ls3 == 0) && (Funct3 == 0)) { // Multiply Accumulate, 2 registers

      switch (F2) {
        case 0:  sOpcode = "P.MULU";    break;
        case 1:  sOpcode = "P.MULHHU";  break;
        case 2:  sOpcode = "P.MULS";    break;
        case 3:  sOpcode = "P.MULHHS";  break;
        default: sOpcode = "";          break;
      }
      if (sOpcode.length) {
        sInst = sOpcode + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + regName(Rs2);
      }

    } else { // Multiply Accumulate, 2 registers and 1 immediate

      switch ((F2 << 3) | Funct3) {
        case 16: sOpcode = "P.MULSN";     break;
        case 24: sOpcode = "P.MULHHSN";   break;
        case 20: sOpcode = "P.MULSRN";    break;
        case 28: sOpcode = "P.MULHHSRN";  break;
        case 0:  sOpcode = "P.MULUN";     break;
        case 8:  sOpcode = "P.MULHHUN";   break; 
        case 4:  sOpcode = "P.MULURN";    break; 
        case 12: sOpcode = "P.MULHHURN";  break;
        case 17: sOpcode = "P.MACSN";     break;
        case 25: sOpcode = "P.MACHHSN";   break; 
        case 21: sOpcode = "P.MACSRN";    break;
        case 29: sOpcode = "P.MACHHSRN";  break;
        case 1:  sOpcode = "P.MACUN";     break; 
        case 9:  sOpcode = "P.MACHHUN";   break;
        case 5:  sOpcode = "P.MACURN";    break; 
        case 13: sOpcode = "P.MACHHURN";  break;
        default: sOpcode = "";            break;
      }
      if (sOpcode.length) {
        sInst = sOpcode + "\t" + regName(Rd) + ", " + regName(Rs1) + ", " + regName(Rs2) + ", " + immName(Ls3);
      }
    }
    break;

  case 99: // Immediate Branching ops

    var a   =  (Encoding >> 7)  & 1;
    var b   =  (Encoding >> 8)  & 0xF;
    var c   =  (Encoding >> 25) & 0x3F;
    var d   =  (Encoding >> 31) & 1;
    Imm12   =  (b | (c << 4) | (a << 11) | (d << 12)) << 1;

    sSymbol =  Debug.getSymbol(Addr + Imm12);

    if (Funct3 == 2) {
      sOpcode = "P.BEQIMM";
    } else if (Funct3 == 3) {
      sOpcode = "P.BNEQIMM";
    } else {
      sOpcode = "";
    }
    if (sOpcode.length) {
      sInst = sOpcode + "\t" + regName(Rs1) + ", " + Imm5 + ", " + Imm12;
      if (sSymbol.length) {
        sInst = sInst + "\t; " + sSymbol;
      }
    }
  } // end switch(Opcode)

  if (sInst.length) {
    return sInst;
  } else {
    return undefined;
  }
}

/*********************************************************************
*
*       getInstInfo
*
*  Function Description
*    Returns information about an instruction.
*
*    Used by Ozone to generate timeline stacks and call-graphs,
*    amongst other applications.
*
*  Function Parameters
*    Addr     instruction address (type: integer)
*    aInst    instruction data bytes (type: byte array)
*    Flags    basic information about the instruction required for analysis.
*
*  Return Value
*    undefined:   if the input instruction is not supported by this plugin or an error occurred
*    InstInfo:    an object corresponding to the following C structure:
*          
*    struct INST_INFO {
*      U32 Mode;                      // instruction execution mode (for ex. THUMB or ARM)
*      U32 Size;                      // instruction byte size
*      U64 AccessAddr;             // branch address or memory access address
*      int StackAdjust;               // Difference of SP before and after instruction execution
*      struct {  
*        U32 IsValid           : 1;   // all fields initialized                     
*        U32 IsControlTransfer : 1;   // Instruction possibly alters the PC (synchronously or asynchronously) (WFI, SVC, POP PC, LDR PC, ...)
*        U32 IsSoftIRQ         : 1;   // Instruction is a software interrupt request
*        U32 IsBranch          : 1;   // Instruction is a simple branch (B, JMP, ...)
*        U32 IsCall            : 1;   // Instruction is a function call (Branch with Link, BL, CALL, ...)
*        U32 IsReturn          : 1;   // Dedicated return instruction or return-style branch (e.g. POP PC)
*        U32 IsMemAccess       : 1;   // Instruction reads from or writes to memory
*        U32 IsFixedAddress    : 1;   // Branch or access address is fixed (absolute or PC-relative)
*        U32 IsBP              : 1;   // Instruction is a SW Breakpoint
*        U32 IsSemiHosting     : 1;   // Instruction could be a semihosting instruction (BKPT 0xAB or SVC 0xAB or SVC 0x123456)
*        U32 IsNOP             : 1;   // Instruction is a NOP
*        U32 IsConditional     : 1;   // Instruction is conditionally executed
*        U32 Condition         : 4;   // Condition if conditionally executed
*      } Flags;
*    }
*/
function getInstInfo(Addr, aInst, Flags) {

  if (aInst.length != 4) {
    return undefined; // the RI5CY ISA extension has a fixed instruction length of 32 bit
  }
  var Encoding  =  (aInst[3] << 24) | (aInst[2] << 16) | (aInst[1] << 8) | aInst[0];
  var Opcode    =  (Encoding >> 0) & 0x7F;

  var InstInfo           = new Object();
  InstInfo.Size          = 4;
  InstInfo.Mode          = 0;
  InstInfo.AccessAddr = 0;
  InstInfo.StackAdjust   = 0;
  InstInfo.Flags         = 0;

  switch (Opcode) {

  case 11:   // Register-Immediate Loads with Post-Increment

    InstInfo.Flags.IsValid     = 1;
    InstInfo.Flags.IsMemAccess = 1;
    break;
  
  case 3:   // Register-Register Loads

    InstInfo.Flags.IsValid     = 1;
    InstInfo.Flags.IsMemAccess = 1;
    break;

  case 43:  // Register Stores with Post-Increment 

    InstInfo.Flags.IsValid     = 1;
    InstInfo.Flags.IsMemAccess = 1;   
    break;

  case 35:  // Register-Register Stores

    InstInfo.Flags.IsValid     = 1;
    InstInfo.Flags.IsMemAccess = 1;
    break;

  case 123: // Hardware Loops

    InstInfo.Flags.IsValid = 1;
    break;

  case 51:  //  Bit Manipulation ops

    InstInfo.Flags.IsValid = 1;
    break;

  case 91:  // General ALU ops
   
    InstInfo.Flags.IsValid = 1;
    break;

  case 99:  // Immediate Branching ops

    var a      =  (Encoding >> 7)  & 1;
    var b      =  (Encoding >> 8)  & 0xF;
    var c      =  (Encoding >> 25) & 0x3F;
    var d      =  (Encoding >> 31) & 1;
    var Imm12  =  (b | (c << 4) | (a << 11) | (d << 12)) << 1;

    InstInfo.AccessAddr          = Addr + Imm12;
    InstInfo.Flags.IsValid       = 1;
    InstInfo.Flags.IsBranch      = 1;
    InstInfo.Flags.IsFixedAddr   = 1;
    InstInfo.Flags.IsConditional = 1;
    
  } // end switch(Opcode)

  if (InstInfo.Flags.IsValid) {
    return InstInfo;
  } else {
    return undefined;
  }
}

/*********************************************************************
*
*       regName
*
*  Function Description
*    Helper function. Returns the ABI name of a RISCV-register.
*/
function regName(r) {

  var sReg;

  switch(r) {
    case 0:  sReg = "zero"; break; 
    case 1:  sReg = "ra";   break;
    case 2:  sReg = "sp";   break;
    case 3:  sReg = "gp";   break;
    case 4:  sReg = "tp";   break;
    case 5:  sReg = "t0";   break;
    case 6:  sReg = "t1";   break;
    case 7:  sReg = "t2";   break;
    case 8:  sReg = "fp";   break; 
    case 9:  sReg = "s1";   break;
    case 10: sReg = "a0";   break;
    case 11: sReg = "a1";   break;
    case 12: sReg = "a2";   break;
    case 13: sReg = "a3";   break;
    case 14: sReg = "a4";   break;
    case 15: sReg = "a5";   break;
    case 16: sReg = "a6";   break; 
    case 17: sReg = "a7";   break;
    case 18: sReg = "s2";   break;
    case 19: sReg = "s3";   break;
    case 20: sReg = "s4";   break;
    case 21: sReg = "s5";   break;
    case 22: sReg = "s6";   break;
    case 23: sReg = "s7";   break;
    case 24: sReg = "s8";   break; 
    case 25: sReg = "s9";   break;
    case 26: sReg = "s10";  break;
    case 27: sReg = "s11";  break;
    case 28: sReg = "t3";   break;
    case 29: sReg = "t4";   break;
    case 30: sReg = "t5";   break;
    case 31: sReg = "t6";   break;
    default: sReg = "?";    break;
  }
  return sReg;
}

/*********************************************************************
*
*       immName
*
*  Function Description
*    Helper function. Returns an unsigned immediate value as string
*/
function immName(Imm) {

  var sImm;

  if (Imm < 0x20) { // print small immediates in decimal
    sImm = Imm.toString(10);
  } else { // print large immediates in hexadecimal
    sImm = Imm.toString(16);
  }
  return sImm;
}

/*************************** End of file ****************************/

References

  1. Ozone User Guide
  2. RI5CY User Manual
  3. J-Link / J-Trace User Guide