Optimization using prefetch - optimization

I want to understand how to use PREFETCH* instructions.
For this I wrote some code:
.model flat
.code
?fast_mem_copy_sse##YAXPAH0H#Z PROC
MOV edi, [esp + 4] ; destination
MOV esi, [esp + 8] ; source
MOV ecx, [esp + 12] ; n bytes for copy
copy_loop_1:
MOVAPS xmm0, [edi + 0 * 4 * 4]
MOVAPS xmm1, [edi + 1 * 4 * 4]
MOVAPS xmm2, [edi + 2 * 4 * 4]
MOVAPS xmm3, [edi + 3 * 4 * 4]
MOVAPS xmm4, [edi + 4 * 4 * 4]
MOVAPS xmm5, [edi + 5 * 4 * 4]
MOVAPS xmm6, [edi + 6 * 4 * 4]
MOVAPS xmm7, [edi + 7 * 4 * 4]
MOVAPS [esi + 0 * 4 * 4], xmm0
MOVAPS [esi + 1 * 4 * 4], xmm1
MOVAPS [esi + 2 * 4 * 4], xmm2
MOVAPS [esi + 3 * 4 * 4], xmm3
MOVAPS [esi + 4 * 4 * 4], xmm4
MOVAPS [esi + 5 * 4 * 4], xmm5
MOVAPS [esi + 6 * 4 * 4], xmm6
MOVAPS [esi + 7 * 4 * 4], xmm7
ADD esi, 4*4*8
ADD edi, 4*4*8
SUB ecx, 4*8
JNZ copy_loop_1
RET
?fast_mem_copy_sse##YAXPAH0H#Z ENDP
?fast_mem_copy_sse_movntdq##YAXPAH0H#Z PROC
MOV edi, [esp + 4] ; destination
MOV esi, [esp + 8] ; source
MOV ecx, [esp + 12] ; n bytes for copy
copy_loop_2:
MOVAPS xmm0, [edi + 0 * 4 * 4]
MOVAPS xmm1, [edi + 1 * 4 * 4]
MOVAPS xmm2, [edi + 2 * 4 * 4]
MOVAPS xmm3, [edi + 3 * 4 * 4]
MOVAPS xmm4, [edi + 4 * 4 * 4]
MOVAPS xmm5, [edi + 5 * 4 * 4]
MOVAPS xmm6, [edi + 6 * 4 * 4]
MOVAPS xmm7, [edi + 7 * 4 * 4]
MOVNTDQ [esi + 0 * 4 * 4], xmm0
MOVNTDQ [esi + 1 * 4 * 4], xmm1
MOVNTDQ [esi + 2 * 4 * 4], xmm2
MOVNTDQ [esi + 3 * 4 * 4], xmm3
MOVNTDQ [esi + 4 * 4 * 4], xmm4
MOVNTDQ [esi + 5 * 4 * 4], xmm5
MOVNTDQ [esi + 6 * 4 * 4], xmm6
MOVNTDQ [esi + 7 * 4 * 4], xmm7
ADD esi, 4*4*8
ADD edi, 4*4*8
SUB ecx, 4*8
JNZ copy_loop_2
RET
?fast_mem_copy_sse_movntdq##YAXPAH0H#Z ENDP
?fast_mem_copy_sse_prefetch##YAXPAH0H#Z PROC
MOV edi, [esp + 4] ; destination
MOV esi, [esp + 8] ; source
MOV ecx, [esp + 12] ; n bytes for copy
copy_loop_3:
;PREFETCHT0 [edi + 0 * 4 * 4]
;PREFETCHT0 [edi + 1 * 4 * 4]
;PREFETCHT0 [edi + 2 * 4 * 4]
;PREFETCHT0 [edi + 3 * 4 * 4]
;PREFETCHT0 [edi + 4 * 4 * 4]
;PREFETCHT0 [edi + 5 * 4 * 4]
;PREFETCHT0 [edi + 6 * 4 * 4]
;PREFETCHT0 [edi + 7 * 4 * 4]
PREFETCHT0 [edi]
MOVAPS xmm0, [edi + 0 * 4 * 4]
MOVAPS xmm1, [edi + 1 * 4 * 4]
MOVAPS xmm2, [edi + 2 * 4 * 4]
MOVAPS xmm3, [edi + 3 * 4 * 4]
MOVAPS xmm4, [edi + 4 * 4 * 4]
MOVAPS xmm5, [edi + 5 * 4 * 4]
MOVAPS xmm6, [edi + 6 * 4 * 4]
MOVAPS xmm7, [edi + 7 * 4 * 4]
MOVAPS [esi + 0 * 4 * 4], xmm0
MOVAPS [esi + 1 * 4 * 4], xmm1
MOVAPS [esi + 2 * 4 * 4], xmm2
MOVAPS [esi + 3 * 4 * 4], xmm3
MOVAPS [esi + 4 * 4 * 4], xmm4
MOVAPS [esi + 5 * 4 * 4], xmm5
MOVAPS [esi + 6 * 4 * 4], xmm6
MOVAPS [esi + 7 * 4 * 4], xmm7
ADD esi, 4*4*8
ADD edi, 4*4*8
SUB ecx, 4*8
JNZ copy_loop_3
RET
?fast_mem_copy_sse_prefetch##YAXPAH0H#Z ENDP
END
#include <string.h>
#include <iostream>
#include <time.h>
//#define CHECK
#define BLOCK_SIZE 8*8
#define AMOUNT_OF_BLOCKS 200*4
#define AMOUNT_OF_RUNS 100000
void fast_mem_copy_sse(int *dst, int *src, int n);
void fast_mem_copy_sse_movntdq(int *dst, int *src, int n);
void fast_mem_copy_sse_prefetch(int *dst, int *src, int n);
void fast_mem_copy(int *dst, int *src, int n)
{
for (int i = 0; i < n; i++) {
*(dst + i) = *(src + i);
}
}
int main()
{
clock_t t;
_declspec(align(16)) int a[AMOUNT_OF_BLOCKS*BLOCK_SIZE];
_declspec(align(16)) int b[AMOUNT_OF_BLOCKS*BLOCK_SIZE];
///////////////////////////////////////////////////////////////////////////////
t = clock();
for (int i = 0; i < AMOUNT_OF_RUNS; i++) {
memset(a, i, BLOCK_SIZE * AMOUNT_OF_BLOCKS * sizeof(int));
fast_mem_copy(b, a, BLOCK_SIZE * AMOUNT_OF_BLOCKS);
#ifdef CHECK
for (int j = 0; j < BLOCK_SIZE * AMOUNT_OF_BLOCKS; j++) {
if (a[j] != b[j]) {
std::cout << "fast_mem_copy work wrong; j = " << j << "\n";
}
}
#endif
}
t = clock() - t;
std::cout << "fast_mem_copy took me " << t << "clicks (" << ((float)t / CLOCKS_PER_SEC) << "seconds).\n";
///////////////////////////////////////////////////////////////////////////////
t = clock();
for (int i = 0; i < AMOUNT_OF_RUNS; i++) {
memset(a, i, BLOCK_SIZE * AMOUNT_OF_BLOCKS * sizeof(int));
fast_mem_copy_sse(b, a, BLOCK_SIZE * AMOUNT_OF_BLOCKS);
#ifdef CHECK
for (int j = 0; j < BLOCK_SIZE * AMOUNT_OF_BLOCKS; j++) {
if (a[j] != b[j]) {
std::cout << "fast_mem_copy_sse work wrong; j = " << j << "\n";
}
}
#endif
}
t = clock() - t;
std::cout << "fast_mem_copy_sse took me " << t << "clicks (" << ((float)t / CLOCKS_PER_SEC) << "seconds).\n";
///////////////////////////////////////////////////////////////////////////////
t = clock();
for (int i = 0; i < AMOUNT_OF_RUNS; i++) {
memset(a, i, BLOCK_SIZE * AMOUNT_OF_BLOCKS * sizeof(int));
fast_mem_copy_sse_movntdq(b, a, BLOCK_SIZE * AMOUNT_OF_BLOCKS);
#ifdef CHECK
for (int j = 0; j < BLOCK_SIZE * AMOUNT_OF_BLOCKS; j++) {
if (a[j] != b[j]) {
std::cout << "fast_mem_copy_sse_movntdq work wrong; j = " << j << "\n";
}
}
#endif
}
t = clock() - t;
std::cout << "fast_mem_copy_sse_movntdq took me " << t << "clicks (" << ((float)t / CLOCKS_PER_SEC) << "seconds).\n";
///////////////////////////////////////////////////////////////////////////////
t = clock();
for (int i = 0; i < AMOUNT_OF_RUNS; i++) {
memset(a, i, BLOCK_SIZE * AMOUNT_OF_BLOCKS * sizeof(int));
fast_mem_copy_sse_prefetch(b, a, BLOCK_SIZE * AMOUNT_OF_BLOCKS);
#ifdef CHECK
for (int j = 0; j < BLOCK_SIZE * AMOUNT_OF_BLOCKS; j++) {
if (a[j] != b[j]) {
std::cout << "fast_mem_copy_sse_prefetch work wrong; j = " << j << "\n";
}
}
#endif
}
t = clock() - t;
std::cout << "fast_mem_copy_sse_prefetch took me " << t << " clicks (" << ((float)t / CLOCKS_PER_SEC) << " seconds).\n";
system("PAUSE");
return 0;
}
I got the following result:
fast_mem_copy took me 11262 clicks (11.262 seconds).
fast_mem_copy_sse took me 1940 clicks (1.94 seconds).
fast_mem_copy_sse_movntdq took me 3570 clicks (3.57 seconds).
fast_mem_copy_sse_prefetch took me 1970 clicks (1.97 seconds).
So what is wrong?
Or in fast_mem_copy_sse are using hardware prefetch and there is no any sense to use instruction for prefetch?
Also I used VTune and it told me that there is no cache misses.

Prefetching will only help if you do it far enough ahead to matter. I believe CPU speeds are up to the point that it now takes about 200 CPU cycles to fetch from RAM. With a loop like yours you'd need to be prefetching probably 10 iterations ahead.
Also, if you are doing simple copy loops that proceed in sequential access, the CPU hardware is already doing prefetch for you.

Related

Combine 2 different sized arrays element-wise based on index pairing array

Say, we had 2 arrays of unique values:
a = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) # any values are possible,
b = np.array([0, 11, 12, 13, 14, 15, 16, 17, 18, 19]) # sorted values are for demonstration
, where a[0] corresponds to b[0], a[1] to b[11], a[2]-b[12], etc.
Then, due to some circumstances we randomly lost some of it and received noise elements from/to both a & b. Now 'useful data' in a and b are kind of 'eroded' like this:
a = np.array([0, 1, 313, 2, 3, 4, 5, 934, 6, 8, 9, 730, 241, 521])
b = np.array([112, 514, 11, 13, 16, 955, 17, 18, 112])
The noise elements have negligible probability to coincide with any of 'useful data'. So, if to search them, we could find the left ones and to define the 'index pairing array':
cor_tab = np.array([[1,2], [4,3], [8,4], [9,7]])
which, if applied, provides pairs of 'useful data' left:
np.column_stack((a[cor_tab[:,0]], b[cor_tab[:,1]]))
array([[1, 11],
[3, 13],
[6, 16],
[8, 18]])
The question: Given the 'eroded' a and b, how to combine them into numpy array such that:
values indexed in cor_tab are paired in the same column/row,
lost values are treated as -1,
noise as 'don't care', and
array looks like this:
[[ -1 112],
[ 0 514],
[ 1 11],
[313 -1],
[ 2 -1],
[ 3 13],
[ 4 -1],
[ 5 -1],
[934 -1],
[ 6 16],
[ -1 955],
[ -1 17],
[ 8 18],
[ 9 -1],
[730 -1],
[241 -1],
[521 112]]
, where 'useful data' is at indices: 2, 5, 9, 12?
Initially I solved this, in dubious way:
import numpy as np
def combine(aa, bb, t):
c0 = np.empty((0), int)
c1 = np.empty((0), int)
# add -1 & 'noise' at the left side:
if t[0][0] > t[0][1]:
c0 = np.append(c0, aa[: t[0][0]])
c1 = np.append(c1, [np.append([-1] * (t[0][0] - t[0][1]), bb[: t[0][1]])])
else:
c0 = np.append(c0, [np.append([-1] * (t[0][1] - t[0][0]), aa[: t[0][0]])])
c1 = np.append(c1, bb[: t[0][1]])
ind_compenstr = t[0][0] - t[0][1] # 'index compensator'
for i, ii in enumerate(t):
x = ii[0] - ii[1] - ind_compenstr
# add -1 & 'noise' in the middle:
if x > 0:
c0 = np.append(c0, [aa[ii[0]-x:ii[0]]])
c1 = np.append(c1, [[-1] * x])
elif x == 0:
c0 = np.append(c0, [aa[ii[0]-x:ii[0]]])
c1 = np.append(c1, [bb[ii[1]-x:ii[1]]])
else:
x = abs(x)
c0 = np.append(c0, [[-1] * x])
c1 = np.append(c1, [bb[ii[1]-x:ii[1]]])
# add useful elements:
c0 = np.append(c0, aa[ii[0]])
c1 = np.append(c1, bb[ii[1]])
ind_compenstr += x
# add -1 & 'noise' at the right side:
l0 = len(aa) - t[-1][0]
l1 = len(bb) - t[-1][1]
if l0 > l1:
c0 = np.append(c0, aa[t[-1][0] + 1:])
c1 = np.append(c1, [np.append(bb[t[-1][1] + 1:], [-1] * (l0 - l1))])
else:
c0 = np.append(c0, [np.append(aa[t[-1][0] + 1:], [-1] * (l1 - l0))])
c1 = np.append(c1, bb[t[-1][1] + 1:])
return np.array([c0,c1])
But bellow I suggest another solution.
It is difficult to understand what the question want, but IIUC, at first, we need to find the column size of the expected array that contains combined uncommon values between the two arrays (np.union1d), and then create an array based on that size full filled by -1 (np.full). Now, using np.searchsorted, the indices of values of an array in another array will be achieved. Values that are not contained in the other array can be given by np.in1d in invert mode. So we can achieve the goal by indexing as:
union_ = np.union1d(a, b)
# [0 1 2 3 4 5 6 7 8 9]
res = np.full((2, union_.size), -1)
# [[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1]
# [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1]]
arange_row_ids = np.arange(union_.size)
# [0 1 2 3 4 5 6 7 8 9]
col_inds = np.searchsorted(a, b)[np.in1d(b, a, invert=True)]
# np.searchsorted(a, b) ---> [1 3 6 7 7]
# np.in1d(b, a, invert=True) ---> [False False False True False]
# [7]
res[0, np.delete(arange_row_ids, col_inds + np.arange(col_inds.size))] = a
# np.delete(arange_row_ids, col_inds + np.arange(col_inds.size)) ---> [0 1 2 3 4 5 6 8 9]
# [[ 0 1 2 3 4 5 6 -1 8 9]
# [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1]]
col_inds = np.searchsorted(b, a)[np.in1d(a, b, invert=True)]
# np.searchsorted(b, a) ---> [0 0 1 1 2 2 2 4 5]
# np.in1d(a, b, invert=True) ---> [ True False True False True True False False True]
# [0 1 2 2 5]
res[1, np.delete(arange_row_ids, col_inds + np.arange(col_inds.size))] = b
# np.delete(arange_row_ids, col_inds + np.arange(col_inds.size)) ---> [1 3 6 7 8]
# [[ 0 1 2 3 4 5 6 -1 8 9]
# [-1 1 -1 3 -1 -1 6 7 8 -1]]
The question is not clear enough to see if the answer is the expected one, but I think it is helpful that could help for further modifications based on the need.
Here's a partially vectorized solution:
import numpy as np
# this function if from Divakar's answer at #https://stackoverflow.com/questions/38619143/convert-python-#sequence-to-numpy-array-filling-missing-values that I used as #function:
def boolean_indexing(v):
lens = np.array([len(item) for item in v])
mask = lens[:,None] > np.arange(lens.max())[::-1]
out = np.full(mask.shape, -1, dtype=int)
out[mask] = np.concatenate(v)
return out
# 2 arrays with eroded useful data and the index pairing array:
a = np.array([0, 1, 313, 2, 3, 4, 5, 934, 6, 8, 9, 730, 241, 521])
b = np.array([112, 514, 11, 13, 16, 955, 17, 18, 112])
cor_tab = np.array([[1,2], [4,3], [8,4], [9,7]])
# split every array by correspondent indices in `cor_tab`:
aa = np.split(a, cor_tab[:,0]+1)
bb = np.split(b, cor_tab[:,1]+1)
#initiate 2 flat empty arrays:
aaa = np.empty((0), int)
bbb = np.empty((0), int)
# loop over the splitted arrays:
for i, j in zip(aa,bb):
c = boolean_indexing([i, j])
aaa = np.append(aaa, c[0])
bbb = np.append(bbb, c[1])
ccc = np.array([aaa,bbb]).T
In case of other types of data, here is another example. Lets take two arrays of letters:
a = np.array(['y', 'w', 'a', 'e', 'i', 'o', 'u', 'y', 'w', 'a', 'e', 'i', 'o', 'u'])
b = np.array(['t', 'h', 'b', 't', 'c', 'n', 's', 'j', 'p', 'z', 'n', 'h', 't', 's', 'm', 'p'])
, and index pairing array:
cor_tab = np.array([[2,0], [3,2], [4,3], [5,5], [6,6], [9,10], [11,12], [13,13]])
np.column_stack((a[cor_tab[:,0]], b[cor_tab[:,1]]))
array([['a', 't'], # useful data
['e', 'b'],
['i', 't'],
['o', 'n'],
['u', 's'],
['a', 'n'],
['i', 't'],
['u', 's']], dtype='<U1')
The only correction required is dtype='<U1' in boolean_indexing(). Result is:
[['y' '-'],
['w' '-'],
['a' 't'],
['-' 'h'],
['e' 'b'],
['i' 't'],
['-' 'c'],
['o' 'n'],
['u' 's'],
['-' 'j'],
['y' 'p'],
['w' 'z'],
['a' 'n'],
['e' 'h'],
['i' 't'],
['o' '-'],
['u' 's'],
['-' 'm'],
['-' 'p']]
It works for floats as well if change dtype in boolean_indexing() to float.

How to convert two bytes to floating point number

I have some legacy files that need mined for data. The files were created by Lotus123 Release 4 for DOS. I'm trying to read the files faster by parsing the bytes rather than using Lotus to open the files.
Dim fileBytes() As Byte = My.Computer.FileSystem.ReadAllBytes(fiPath)
'I loop through all the data getting first/second bytes for each value
do ...
Dim FirstByte As Int16 = Convert.ToInt16(fileBytes(Index))
Dim SecondByte As Int16 = Convert.ToInt16(fileBytes(Index + 1))
loop ...
I can get integer values like this:
Dim value As Int16 = BitConverter.ToInt16(fileBytes, Index + 8) / 2
But floating numbers are more complicated. Only the smaller numbers are stored with two bytes. Larger values take 10 bytes, but that's another question. Here we only have smaller values with two bytes. Here are some sample values. I entered the byte values into Excel and use the =DEC2BIN() to convert to binary adding zeros on the left as needed to get 8 bits.
First Second
Byte Byte Value First Byte 2nd Byte
7 241 = -1.2 0000 0111 1111 0001
254 255 = -1 1111 1110 1111 1111
9 156 = -0.8 0000 1001 1001 1100
9 181 = -0.6 0000 1001 1011 0101
9 206 = -0.4 0000 1001 1100 1110
9 231 = -0.2 0000 1001 1110 0111
13 0 = 0 0000 1101 0000 0000
137 12 = 0.1 1000 1001 0000 1100
9 25 = 0.2 0000 1001 0001 1001
137 37 = 0.3 1000 1001 0010 0101
9 50 = 0.4 0000 1001 0011 0010
15 2 = 0.5 0000 1111 0000 0010
9 75 = 0.6 0000 1001 0100 1011
137 87 = 0.7 1000 1001 0101 0111
9 100 = 0.8 0000 1001 0110 0100
137 112 = 0.9 1000 1001 0111 0000
2 0 = 1 0000 0010 0000 0000
199 13 = 1.1 1100 0111 0000 1101
7 15 = 1.2 0000 0111 0000 1111
71 16 = 1.3 0100 0111 0001 0000
135 17 = 1.4 1000 0111 0001 0001
15 6 = 1.5 0000 1111 0000 0110
7 20 = 1.6 0000 0111 0001 0100
71 21 = 1.7 0100 0111 0001 0101
135 22 = 1.8 1000 0111 0001 0110
199 23 = 1.9 1100 0111 0001 0111
4 0 = 2 0000 0100 0000 0000
I'm hoping for a simple conversion method. Or maybe it'll be more complicated.
I looked at BCD: "BCD was used in many early decimal computers, and is implemented in the instruction set of machines such as the IBM System/360 series" and Intel BCD opcode
I do not know if this is BCD or what it is. How do I convert the two bits into a floating point number?
I used the information from the website pointed out by Andrew Morton in comments. Basically the stored 16-bit quantity consists of either a 15-bit two's complement integer (when the lsb is 0) or a 12-bit two's complement integer plus a processing code indicating a scale factor to be applied to that integer (when the lsb is 1). I am not familiar with vb.net so am providing ISO-C code here. Program below successfully decodes all the data provided in the question.
Note: I am converting to an 8-byte double in code below, while the question suggests that the original conversion may have been to a 10-byte long double format (the 80-bit extended-precision format of the 8087 math coprocessor). It would seem like a good idea to try more test data to achieve full coverage of the eight scaling codes: Large integers like 1,000,000 and 1,000,000,000; decimal fractions like 0.0003, 0.000005, and 0.00000007; and binary fractions like 0.125 (1/8) and 0.046875 (3/64).
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
typedef struct {
uint8_t byte1;
uint8_t byte2;
} num;
num data[] =
{
{ 7, 241}, {254, 255}, { 9, 156}, { 9, 181}, { 9, 206}, { 9, 231},
{ 13, 0}, {137, 12}, { 9, 25}, {137, 37}, { 9, 50}, { 15, 2},
{ 9, 75}, {137, 87}, { 9, 100}, {137, 112}, { 2, 0}, {199, 13},
{ 7, 15}, { 71, 16}, {135, 17}, { 15, 6}, { 7, 20}, { 71, 21},
{135, 22}, {199, 23}, { 4, 0}
};
int data_count = sizeof (data) / sizeof (data[0]);
/* define operators that may look more familiar to vb.net programmers */
#define XOR ^
#define MOD %
int main (void)
{
int i;
uint8_t b1, b2;
uint16_t h, code;
int32_t n;
double r;
for (i = 0; i < data_count; i++) {
b1 = data[i].byte1;
b2 = data[i].byte2;
/* data word */
h = ((uint16_t)b2 * 256) + b1;
/* h<0>=1 indicates stored integer needs to be scaled */
if ((h MOD 2) == 1) {
/* extract scaling code in h<3:1> */
code = (h / 2) MOD 8;
/* scaled 12-bit integer in h<15:4>. Extract, sign-extend to 32 bits */
n = (int32_t)((((uint32_t)h / 16) XOR 2048) - 2048);
/* convert integer to floating-point */
r = (double)n;
/* scale based on scaling code */
switch (code) {
case 0x0: r = r * 5000; break;
case 0x1: r = r * 500; break;
case 0x2: r = r / 20; break;
case 0x3: r = r / 200; break;
case 0x4: r = r / 2000; break;
case 0x5: r = r / 20000; break;
case 0x6: r = r / 16; break;
case 0x7: r = r / 64; break;
};
} else {
/* unscaled 15-bit integer in h<15:1>. Extract, sign extend to 32 bits */
n = (int32_t)((((uint32_t)h / 2) XOR 16384) - 16384);
/* convert integer to floating-point */
r = (double)n;
}
printf ("[%3d,%3d] n=%08x r=% 12.8f\n", b1, b2, n, r);
}
return EXIT_SUCCESS;
}
The output of this program is as follows:
[ 7,241] n=ffffff10 r= -1.20000000
[254,255] n=ffffffff r= -1.00000000
[ 9,156] n=fffff9c0 r= -0.80000000
[ 9,181] n=fffffb50 r= -0.60000000
[ 9,206] n=fffffce0 r= -0.40000000
[ 9,231] n=fffffe70 r= -0.20000000
[ 13, 0] n=00000000 r= 0.00000000
[137, 12] n=000000c8 r= 0.10000000
[ 9, 25] n=00000190 r= 0.20000000
[137, 37] n=00000258 r= 0.30000000
[ 9, 50] n=00000320 r= 0.40000000
[ 15, 2] n=00000020 r= 0.50000000
[ 9, 75] n=000004b0 r= 0.60000000
[137, 87] n=00000578 r= 0.70000000
[ 9,100] n=00000640 r= 0.80000000
[137,112] n=00000708 r= 0.90000000
[ 2, 0] n=00000001 r= 1.00000000
[199, 13] n=000000dc r= 1.10000000
[ 7, 15] n=000000f0 r= 1.20000000
[ 71, 16] n=00000104 r= 1.30000000
[135, 17] n=00000118 r= 1.40000000
[ 15, 6] n=00000060 r= 1.50000000
[ 7, 20] n=00000140 r= 1.60000000
[ 71, 21] n=00000154 r= 1.70000000
[135, 22] n=00000168 r= 1.80000000
[199, 23] n=0000017c r= 1.90000000
[ 4, 0] n=00000002 r= 2.00000000
Just a VB.Net translation of the C code posted by njuffa.
The original structure has been substituted with a Byte array and the numeric data type adapted to .Net types. That's all.
Dim data As Byte(,) = New Byte(,) {
{7, 241}, {254, 255}, {9, 156}, {9, 181}, {9, 206}, {9, 231}, {13, 0}, {137, 12}, {9, 25},
{137, 37}, {9, 50}, {15, 2}, {9, 75}, {137, 87}, {9, 100}, {137, 112}, {2, 0}, {199, 13},
{7, 15}, {71, 16}, {135, 17}, {15, 6}, {7, 20}, {71, 21}, {135, 22}, {199, 23}, {4, 0}
}
Dim byte1, byte2 As Byte
Dim word, code As UShort
Dim nValue As Integer
Dim result As Double
For i As Integer = 0 To (data.Length \ 2 - 1)
byte1 = data(i, 0)
byte2 = data(i, 1)
word = (byte2 * 256US) + byte1
If (word Mod 2) = 1 Then
code = (word \ 2US) Mod 8US
nValue = ((word \ 16) Xor 2048) - 2048
Select Case code
Case 0 : result = nValue * 5000
Case 1 : result = nValue * 500
Case 2 : result = nValue / 20
Case 3 : result = nValue / 200
Case 4 : result = nValue / 2000
Case 5 : result = nValue / 20000
Case 6 : result = nValue / 16
Case 7 : result = nValue / 64
End Select
Else
'unscaled 15-bit integer in h<15:1>. Extract, sign extend to 32 bits
nValue = ((word \ 2) Xor 16384) - 16384
result = nValue
End If
Console.WriteLine($"[{byte1,3:D}, {byte2,3:D}] number = {nValue:X8} result ={result,12:F8}")
Next

Weird behavior of multiply in tensorflow

I am trying to use multiply in my program, but I find the behavior of this op is unnormal. It seems that it is calculating the wrong results. Minimum example:
import tensorflow as tf
batchSize = 2
maxSteps = 3
max_cluster_size = 4
x = tf.Variable(tf.random_uniform(dtype=tf.int32, maxval=20, shape=[batchSize, maxSteps, max_cluster_size]))
y = tf.sequence_mask(tf.random_uniform(minval=1, maxval=max_cluster_size-1, dtype=tf.int32, shape=[batchSize, maxSteps]), maxlen=max_cluster_size)
y = tf.cast(y, tf.int32)
z = tf.multiply(x, y)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
x_v = sess.run(x)
y_v = sess.run(y)
z_v = sess.run(z)
print(x_v.shape)
print(x_v)
print('----------------------------')
print(y_v.shape)
print(y_v)
print('----------------------------')
print(z_v.shape)
print(z_v)
print('----------------------------')
Result:
(2, 3, 4)
[[[ 7 12 19 3]
[10 18 15 7]
[18 9 2 7]]
[[ 4 5 16 1]
[ 2 14 15 14]
[ 5 18 8 18]]]
----------------------------
(2, 3, 4)
[[[1 1 0 0]
[1 0 0 0]
[1 1 0 0]]
[[1 1 0 0]
[1 1 0 0]
[1 1 0 0]]]
----------------------------
(2, 3, 4)
[[[ 7 12 0 0]
[10 0 0 0]
[18 0 0 0]]
[[ 4 5 0 0]
[ 2 0 0 0]
[ 5 0 0 0]]]
----------------------------
Where z_v is expected to be:
[[[ 7 12 0 0]
[10 0 0 0]
[18 9 0 0]]
[[ 4 5 0 0]
[ 2 14 0 0]
[ 5 18 0 0]]]
When I test multiply in other programs, it goes just fine.
I suspect that this may be related to x and y are random variables. Anyone give a hint on this?
Instead of these lines:
x_v = sess.run(x)
y_v = sess.run(y)
z_v = sess.run(z)
you need to use this:
x_v, y_v, z_v = sess.run( [ x, y, z ] )
With the first, separate version, basically what ends up happening is that you create x_v, and then y_v, but when you run the sess.run(z) it will recalculate z's dependencies as well, so you end up seeing the output from different x's and y's than you print.

how to write test for breaking down (RTP depayload or parsed) JPEG frame into RTP (RFC 2435)

I am a bit new and still learning about socket and RTP stuff. Basically, i want to write a program that can send a jpeg (ultimately gonna be mjpeg) through RTP using UDP protocol.
I have the program for UDP and be able to send files or stream to a local address (127.0.0.1) using openCV. ok. So that is not a problem.
However, when i try to pass the frame that capture through VideoCapture from openCV, it wont work. Or rather i have no idea how to do it to be exact.
So now, i want to break this problem down and decide to test only on the RTP side.
So as you can see in my main function, i tried to test out the functions but it doesn't look like it works.
Can someone point me to the right direction of how to properly test those function, specially the sendFrame one ?
This code mainly just copy and paste from RFC2435 document.
Thanks
`/*
* Table K.1 from JPEG spec.
*/
static const int jpeg_luma_quantizer[64] = {
16, 11, 10, 16, 24, 40, 51, 61,
12, 12, 14, 19, 26, 58, 60, 55,
14, 13, 16, 24, 40, 57, 69, 56,
14, 17, 22, 29, 51, 87, 80, 62,
18, 22, 37, 56, 68, 109, 103, 77,
24, 35, 55, 64, 81, 104, 113, 92,
49, 64, 78, 87, 103, 121, 120, 101,
72, 92, 95, 98, 112, 100, 103, 99
};
`
/*
* Table K.2 from JPEG spec.
*/
static const int jpeg_chroma_quantizer[64] = {
17, 18, 24, 47, 99, 99, 99, 99,
18, 21, 26, 66, 99, 99, 99, 99,
24, 26, 56, 99, 99, 99, 99, 99,
47, 66, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99
};
int main(int argc, char * argv[]) {
//setup openCV
cvNamedWindow("UDP Video Sender", CV_WINDOW_AUTOSIZE);
CvCapture* capture = cvCreateCameraCapture(0);
if(!capture){
std::cout<<"No camera found."<< std::endl;
goto DONE;
}
IplImage *frame;
frame = cvQueryFrame(capture);
IplImage *small = cvCreateImage(cvSize(frame->width / 2, frame->height / 2),
frame->depth, 3);
while(1){
//capture frame and resize
frame = cvQueryFrame(capture);
cvResize(frame, small, CV_INTER_LINEAR);
cvShowImage("UDP Video Sender", small);
//MakeHeaders(filename,0,5,5,0,128,0);
//MakeTables(128,frame,0);
//MakeDRIHeader();
// MakeHuffmanHeader();
MakeHuffmanHeader(128,1024,1024,uchar *lum_ac_symbols[],1024,1,1 );
//MakeQuantHeader();
DONE:
cout<<"Press any key to continue."<<endl;
}
}
/*
* Call MakeTables with the Q factor and two u_char[64] return arrays
*/
void
MakeTables(int q, u_char *lqt, u_char *cqt)
{
int i;
int factor = q;
if (q < 1) factor = 1;
if (q > 99) factor = 99;
if (q < 50)
q = 5000 / factor;
else
q = 200 - factor*2;
for (i=0; i < 64; i++) {
int lq = (jpeg_luma_quantizer[i] * q + 50) / 100;
int cq = (jpeg_chroma_quantizer[i] * q + 50) / 100;
/* Limit the quantizers to 1 <= q <= 255 */
if (lq < 1) lq = 1;
else if (lq > 255) lq = 255;
lqt[i] = lq;
if (cq < 1) cq = 1;
else if (cq > 255) cq = 255;
cqt[i] = cq;
}
}
/**The following routines can be used to create the JPEG marker segments
corresponding to the table-specification data that is absent from the
RTP/JPEG body.
*/
u_char lum_dc_codelens[] = {
0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
};
u_char lum_dc_symbols[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
};
u_char lum_ac_codelens[] = {
0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d,
};
u_char lum_ac_symbols[] = {
0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
0xf9, 0xfa,
};
u_char chm_dc_codelens[] = {
0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
};
u_char chm_dc_symbols[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
};
u_char chm_ac_codelens[] = {
0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77,
};
u_char chm_ac_symbols[] = {
0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
0xf9, 0xfa,
};
u_char *
MakeQuantHeader(u_char *p, u_char *qt, int tableNo)
{
*p++ = 0xff;
*p++ = 0xdb; /* DQT */
*p++ = 0; /* length msb */
*p++ = 67; /* length lsb */
*p++ = tableNo;
memcpy(p, qt, 64);
return (p + 64);
}
u_char *
MakeHuffmanHeader(u_char *p, u_char *codelens, int ncodes,
u_char *symbols, int nsymbols, int tableNo,
int tableClass)
{
*p++ = 0xff;
*p++ = 0xc4; /* DHT */
*p++ = 0; /* length msb */
*p++ = 3 + ncodes + nsymbols; /* length lsb */
*p++ = (tableClass << 4) | tableNo;
memcpy(p, codelens, ncodes);
p += ncodes;
memcpy(p, symbols, nsymbols);
p += nsymbols;
return (p);
}
u_char *
MakeDRIHeader(u_char *p, u_short dri) {
*p++ = 0xff;
*p++ = 0xdd; /* DRI */
*p++ = 0x0; /* length msb */
*p++ = 4; /* length lsb */
*p++ = dri >> 8; /* dri msb */
*p++ = dri & 0xff; /* dri lsb */
return (p);
}
/*
* Arguments:
* type, width, height: as supplied in RTP/JPEG header
* lqt, cqt: quantization tables as either derived from
* the Q field using MakeTables() or as specified
* in section 4.2.
* dri: restart interval in MCUs, or 0 if no restarts.
*
* p: pointer to return area
*
* Return value:
* The length of the generated headers.
*
* Generate a frame and scan headers that can be prepended to the
* RTP/JPEG data payload to produce a JPEG compressed image in
* interchange format (except for possible trailing garbage and
* absence of an EOI marker to terminate the scan).
*/
int MakeHeaders(u_char *p, int type, int w, int h, u_char *lqt,
u_char *cqt, u_short dri)
{
u_char *start = p;
/* convert from blocks to pixels */
w <<= 3;
h <<= 3;
*p++ = 0xff;
*p++ = 0xd8; /* SOI */
p = MakeQuantHeader(p, lqt, 0);
p = MakeQuantHeader(p, cqt, 1);
if (dri != 0)
p = MakeDRIHeader(p, dri);
*p++ = 0xff;
*p++ = 0xc0; /* SOF */
*p++ = 0; /* length msb */
*p++ = 17; /* length lsb */
*p++ = 8; /* 8-bit precision */
*p++ = h >> 8; /* height msb */
*p++ = h; /* height lsb */
*p++ = w >> 8; /* width msb */
*p++ = w; /* wudth lsb */
*p++ = 3; /* number of components */
*p++ = 0; /* comp 0 */
if (type == 0)
*p++ = 0x21; /* hsamp = 2, vsamp = 1 */
else
*p++ = 0x22; /* hsamp = 2, vsamp = 2 */
*p++ = 0; /* quant table 0 */
*p++ = 1; /* comp 1 */
*p++ = 0x11; /* hsamp = 1, vsamp = 1 */
*p++ = 1; /* quant table 1 */
*p++ = 2; /* comp 2 */
*p++ = 0x11; /* hsamp = 1, vsamp = 1 */
*p++ = 1; /* quant table 1 */
p = MakeHuffmanHeader(p, lum_dc_codelens,
sizeof(lum_dc_codelens),
lum_dc_symbols,
sizeof(lum_dc_symbols), 0, 0);
p = MakeHuffmanHeader(p, lum_ac_codelens,
sizeof(lum_ac_codelens),
lum_ac_symbols,
sizeof(lum_ac_symbols), 0, 1);
p = MakeHuffmanHeader(p, chm_dc_codelens,
sizeof(chm_dc_codelens),
chm_dc_symbols,
sizeof(chm_dc_symbols), 1, 0);
p = MakeHuffmanHeader(p, chm_ac_codelens,
sizeof(chm_ac_codelens),
chm_ac_symbols,
sizeof(chm_ac_symbols), 1, 1);
*p++ = 0xff;
*p++ = 0xda; /* SOS */
*p++ = 0; /* length msb */
*p++ = 12; /* length lsb */
*p++ = 3; /* 3 components */
*p++ = 0; /* comp 0 */
*p++ = 0; /* huffman table 0 */
*p++ = 1; /* comp 1 */
*p++ = 0x11; /* huffman table 1 */
*p++ = 2; /* comp 2 */
*p++ = 0x11; /* huffman table 1 */
*p++ = 0; /* first DCT coeff */
*p++ = 63; /* last DCT coeff */
*p++ = 0; /* sucessive approx. */
return (p - start);
};
/*
* RTP data header from RFC1889
*/
typedef struct {
unsigned int version:2; /* protocol version */
unsigned int p:1; /* padding flag */
unsigned int x:1; /* header extension flag */
unsigned int cc:4; /* CSRC count */
unsigned int m:1; /* marker bit */
unsigned int pt:7; /* payload type */
u_int16_t seq; /* sequence number */
u_int32_t ts; /* timestamp */
u_int32_t ssrc; /* synchronization source */
u_int32_t csrc[1]; /* optional CSRC list */
} rtp_hdr_t;
#define RTP_HDR_SZ 12
/* The following definition is from RFC1890 */
#define RTP_PT_JPEG 26
struct jpeghdr {
unsigned int tspec:8; /* type-specific field */
unsigned int off:24; /* fragment byte offset */
u_int8_t type; /* id of jpeg decoder params */
u_int8_t q; /* quantization factor (or table id) */
u_int8_t width; /* frame width in 8 pixel blocks */
u_int8_t height; /* frame height in 8 pixel blocks */
};
struct jpeghdr_rst {
u_int16_t dri;
unsigned int f:1;
unsigned int l:1;
unsigned int count:14;
};
struct jpeghdr_qtable {
u_int8_t mbz;
u_int8_t precision;
u_int16_t length;
};
#define RTP_JPEG_RESTART 0x40
/* Procedure SendFrame:
*
* Arguments:
* start_seq: The sequence number for the first packet of the current
* frame.
* ts: RTP timestamp for the current frame
* ssrc: RTP SSRC value
* jpeg_data: Huffman encoded JPEG scan data
* len: Length of the JPEG scan data
* type: The value the RTP/JPEG type field should be set to
* typespec: The value the RTP/JPEG type-specific field should be set
* to
* width: The width in pixels of the JPEG image
* height: The height in pixels of the JPEG image
* dri: The number of MCUs between restart markers (or 0 if there
* are no restart markers in the data
* q: The Q factor of the data, to be specified using the Independent
* JPEG group's algorithm if 1 <= q <= 99, specified explicitly
* with lqt and cqt if q >= 128, or undefined otherwise.
* lqt: The quantization table for the luminance channel if q >= 128
* cqt: The quantization table for the chrominance channels if
* q >= 128
*
* Return value:
* the sequence number to be sent for the first packet of the next
* frame.
*
* The following are assumed to be defined:
*
* PACKET_SIZE - The size of the outgoing packet
* send_packet(u_int8_t *data, int len) - Sends the packet to the network
*/
#define PACKET_SIZE 512
u_int16_t SendFrame(u_int16_t start_seq, u_int32_t ts, u_int32_t ssrc,
u_int8_t *jpeg_data, int len, u_int8_t type,
u_int8_t typespec, int width, int height, int dri,
u_int8_t q, u_int8_t *lqt, u_int8_t *cqt) {
rtp_hdr_t rtphdr;
struct jpeghdr jpghdr;
struct jpeghdr_rst rsthdr;
struct jpeghdr_qtable qtblhdr;
u_int8_t packet_buf[PACKET_SIZE];
u_int8_t *ptr;
int bytes_left = len;
int seq = start_seq;
int pkt_len, data_len;
/* Initialize RTP header
*/
rtphdr.version = 2;
rtphdr.p = 0;
rtphdr.x = 0;
rtphdr.cc = 0;
rtphdr.m = 0;
rtphdr.pt = RTP_PT_JPEG;
rtphdr.seq = start_seq;
rtphdr.ts = ts;
rtphdr.ssrc = ssrc;
/* Initialize JPEG header
*/
jpghdr.tspec = typespec;
jpghdr.off = 0;
jpghdr.type = type | ((dri != 0) ? RTP_JPEG_RESTART : 0);
jpghdr.q = q;
jpghdr.width = width / 8;
jpghdr.height = height / 8;
/* Initialize DRI header
*/
if (dri != 0) {
rsthdr.dri = dri;
rsthdr.f = 1; /* This code does not align RIs */
rsthdr.l = 1;
rsthdr.count = 0x3fff;
}
/* Initialize quantization table header
*/
if (q >= 128) {
qtblhdr.mbz = 0;
qtblhdr.precision = 0; /* This code uses 8 bit tables only */
qtblhdr.length = 128; /* 2 64-byte tables */
}
while (bytes_left > 0) {
ptr = packet_buf + RTP_HDR_SZ;
jpghdr.off = htonl(jpghdr.off);
/*convert offset in hdr to network order, copy to packet*/
memcpy(ptr, &jpghdr, sizeof(jpghdr));
jpghdr.off = ntohl(jpghdr.off);
ptr += sizeof(jpghdr);
data_len = PACKET_SIZE - (ptr - packet_buf);
if (data_len >= bytes_left) {
data_len = bytes_left;
rtphdr.m = 1;
}
rtphdr.seq = htons(rtphdr.seq);
memcpy(packet_buf, &rtphdr, RTP_HDR_SZ);
memcpy(ptr, jpeg_data + jpghdr.off, data_len);
if(((ptr-packet_buf)+data_len) &&
send(sock[0], packet_buf, (ptr - packet_buf) + data_len, 0)<0)
perror("hre");
jpghdr.off += data_len;
bytes_left -= data_len;
rtphdr.seq = ntohs(rtphdr.seq);
rtphdr.seq++;
}
free(packet_buf);
return rtphdr.seq;
}

Scilab - Legend ONLY for a specific set of functions

I would like to generate boundaries using xfpoly and save them using xs2pdf. Then I want to display a plot of 2 functions into those boundaries, add a legend to those functions and save the image again.
My code follows...
clear; clc; xdel(winsid());
t = -2:0.01:2;
x_1 = t.^2; x_2 = t.^4;
xfpoly([-3 -2 -2 -3], [0 0 16 16], color('grey'));
ax = gca();
ax.auto_clear = 'off'; ax.data_bounds = [-3, 0; 3, 3];
ax.box = 'on';
ax.axes_visible = ['on','on','off']; ax.tight_limits = ['on','on','off'];
xfpoly([2 3 3 2], [0 0 16 16], color('grey'));
xfpoly([-1 1 1 -1], [1 1 16 16], color('grey'));
xs2pdf(gcf(), 'fig_1');
plot2d(t, [x_1', x_2'], [color('green'), color('red')]);
legend(['t^2'; 't^4']);
leg_ent = gce();
leg_ent.text = ['';'';'';'t^2'; 't^4']
xs2pdf(gcf(), 'fig_2');
Do you want something like this?
clear;
clc;
t = -2:0.01:2;
x_1 = t.^2; x_2 = t.^4;
scf(0);
clf(0);
//plot the curves first to make legend easier
plot2d(t, [x_1', x_2'], [color('green'), color('red')]);
legend(['t^2'; 't^4']); //the first two elements are the curves, so no neet to modify
ax = gca();
ax.auto_clear = 'off';
ax.data_bounds = [-3, 0; 3, 3];
ax.box = 'on';
xfpoly([-3 -2 -2 -3], [0 0 3 3], color('grey'));
xfpoly([2 3 3 2], [0 0 3 3], color('grey'));
xfpoly([-1 1 1 -1], [1 1 3 3], color('grey'));
scf(1);
clf(1);
xfpoly([-3 -2 -2 -3], [0 0 3 3], color('grey')); //ymax sholud be 3, not 16
xfpoly([2 3 3 2], [0 0 3 3], color('grey'));
xfpoly([-1 1 1 -1], [1 1 3 3], color('grey'));
ax = gca();
ax.auto_clear = 'off';
ax.data_bounds = [-3, 0; 3, 3];
ax.box = 'on';
Atilla's answer brought me to this solution using pause command:
clear; clc; xdel(winsid());
t = -2:0.01:2;
x_1 = t.^2; x_2 = t.^4;
plot2d(t, [x_1', x_2'], [color('green'), color('red')]); plot_1 = gce();
legend(['t^2'; 't^4']); leg_1 = gce();
plot_1.visible = 'off'; leg_1.visible = 'off';
xfpoly([-3 -2 -2 -3], [0 0 16 16], color('grey'));
xfpoly([2 3 3 2], [0 0 16 16], color('grey'));
xfpoly([-1 1 1 -1], [1 1 16 16], color('grey'));
ax = gca();
ax.box = 'on';
xs2pdf(gcf(), 'fig_1');
// pause
plot_1.visible = 'on'; leg_1.visible = 'on';
xs2pdf(gcf(), 'fig_2');