I am using vulkan-tutorial codes and i made modify for cubemap.
when i use VK_FORMAT_R8G8B8A8_UNORM is working with this code:
unsigned char* pixelsArray[6];
for (int i = 0; i < 6; ++i)
{
pixelsArray[i] = stbi_load(imageFileArray[i].c_str(), &texWidth, &texHeight, &texChannels, STBI_rgb_alpha);
}
VkDeviceSize allSize = texWidth * texHeight * 4 * 6;
VkDeviceSize size = texWidth * texHeight * 4 ;
VkBufferCreateInfo bufferInfo{};
...
bufferInfo.size = allSize ;
vkMapMemory(device, stagingBufferMemory, 0, AllSize, 0, &data);
for(int i = 0; i < 6; ++i)
{
memcpy( (char*) data + (size*i) , pixelsArray[i], static_cast<size_t>(size));
}
vkUnmapMemory(device, stagingBufferMemory);
VkImageCreateInfo imageInfo{};
...
imageInfo.arrayLayers = 6;
imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
imageInfo.flags = VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
VkImageViewCreateInfo viewInfo{};
...
viewInfo.viewType = VK_IMAGE_VIEW_TYPE_CUBE;
viewInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
viewInfo.subresourceRange.layerCount = 6;
but when i try VK_FORMAT_R16G16B16A16_SFLOAT is giving distorted display and no validation error with this code:
float* pixelsArray[6];
for (int i = 0; i < 6; ++i)
{
pixelsArray[i] = stbi_loadf(imageFileArray[i].c_str(), &texWidth, &texHeight, &texChannels, STBI_rgb_alpha);
}
VkDeviceSize allSize = texWidth * texHeight * 4 * 6 * 2;// I added *2
VkDeviceSize size = texWidth * texHeight * 4 * 2;// I added *2
VkBufferCreateInfo bufferInfo{};
...
bufferInfo.size = allSize ;
vkMapMemory(device, stagingBufferMemory, 0, AllSize, 0, &data);
for(int i = 0; i < 6; ++i)
{
memcpy( (char*) data + (size*i) , pixelsArray[i], static_cast<size_t>(size));
}
vkUnmapMemory(device, stagingBufferMemory);
VkImageCreateInfo imageInfo{};
...
imageInfo.arrayLayers = 6;
imageInfo.format = VK_FORMAT_R16G16B16A16_SFLOAT;
imageInfo.flags = VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
VkImageViewCreateInfo viewInfo{};
...
viewInfo.viewType = VK_IMAGE_VIEW_TYPE_CUBE;
viewInfo.format = VK_FORMAT_R16G16B16A16_SFLOAT;
viewInfo.subresourceRange.layerCount = 6;
when VK_FORMAT_R8G8B8A8_UNORM :
when VK_FORMAT_R16G16B16A16_SFLOAT :
i fixed the problem. problem was that i want to use half float but i was sending float to memcpy function.i searched how can i use half float and i found a solution without using extra library.
what i did add helper functions :
typedef unsigned int uint;
typedef unsigned short ushort;
uint as_uint(const float x)
{
return *(uint*)&x;
}
ushort float_to_half(const float x)
{
// IEEE-754 16-bit floating-point format (without infinity): 1-5-10, exp-15, +-131008.0, +-6.1035156E-5, +-5.9604645E-8, 3.311 digits
const uint b = as_uint(x)+0x00001000; // round-to-nearest-even: add last bit after truncated mantissa
const uint e = (b&0x7F800000)>>23; // exponent
const uint m = b&0x007FFFFF; // mantissa; in line below: 0x007FF000 = 0x00800000-0x00001000 = decimal indicator flag - initial rounding
return (b&0x80000000)>>16 | (e>112)*((((e-112)<<10)&0x7C00)|m>>13) | ((e<113)&(e>101))*((((0x007FF000+m)>>(125-e))+1)>>1) | (e>143)*0x7FFF; // sign : normalized : denormalized : saturate
}
and fix problem with this helper functions :
VkDeviceSize size_2 = texWidth * texHeight * 4;// different from the above variables in question : allSize or size
//create half float for cubemap
void* half_pixelsArray[6];
half_pixelsArray[0] = new ushort[size_2];
half_pixelsArray[1] = new ushort[size_2];
half_pixelsArray[2] = new ushort[size_2];
half_pixelsArray[3] = new ushort[size_2];
half_pixelsArray[4] = new ushort[size_2];
half_pixelsArray[5] = new ushort[size_2];
//copy from float to half float
for (int i = 0; i < 6; ++i)
{
for (int j = 0; j < size_2; ++j)
{
((ushort*)half_pixelsArray[i])[j] = float_to_half( pixelsArray[i][j] );
}
}
// and change float to half flaot in memcpy
memcpy( (char*) data + (layerSize*i) , half_pixelsArray[i], static_cast<size_t>(layerSize));
I made a voxel raycaster in Unity using a compute shader and a texture. But at 1080p, it is limited to a view distance of only 100 at 30 fps. With no light bounces yet or anything, I am quite disappointed with this performance.
I tried learning Vulkan and the best tutorials are based on rasterization, and I guess all I really want to do is compute pixels in parallel on the GPU. I am familiar with CUDA and I've read that is sometimes used for rendering? Or is there a simple way of just computing pixels in parallel in Vulcan? I've already got a template Vulkan project that opens a blank window. I don't need to get any data back from the GPU just render straight to the screen after giving it data.
And with the code below would it be significantly faster in Vulkan as opposed to a Unity compute shader? It has A LOT of if/else statements in it which I have read is bad for GPUs but I can't think of any other way of writing it.
EDIT: I optimized it as much as I could but it's still pretty slow, like 30 fps at 1080p.
Here is the compute shader:
#pragma kernel CSMain
RWTexture2D<float4> Result; // the actual array of pixels the player sees
const float width; // in pixels
const float height;
const StructuredBuffer<int> voxelMaterials; // for now just getting a flat voxel array
const int voxelBufferRowSize;
const int voxelBufferPlaneSize;
const int voxelBufferSize;
const StructuredBuffer<float3> rayDirections; // I'm now actually using it as points instead of directions
const float maxRayDistance;
const float3 playerCameraPosition; // relative to the voxelData, ie the first voxel's bottom, back, left corner position, no negative coordinates
const float3 playerWorldForward;
const float3 playerWorldRight;
const float3 playerWorldUp;
[numthreads(8,8,1)]
void CSMain (uint3 id : SV_DispatchThreadID)
{
Result[id.xy] = float4(0, 0, 0, 0); // setting the pixel to black by default
float3 pointHolder = playerCameraPosition; // initializing the first point to the player's position
const float3 p = rayDirections[id.x + (id.y * width)]; // vector transformation getting the world space directions of the rays relative to the player
const float3 u1 = p.x * playerWorldRight;
const float3 u2 = p.y * playerWorldUp;
const float3 u3 = p.z * playerWorldForward;
const float3 direction = u1 + u2 + u3; // the direction to that point
float distanceTraveled = 0;
int3 directionAxes; // 1 for positive, 0 for zero, -1 for negative
int3 directionIfReplacements = { 0, 0, 0 }; // 1 for positive, 0 for zero, -1 for negative
float3 axesUnit = { 1 / abs(direction.x), 1 / abs(direction.y), 1 / abs(direction.z) };
float3 distancesXYZ = { 1000, 1000, 1000 };
int face = 0; // 1 = x, 2 = y, 3 = z // the current face the while loop point is on
// comparing the floats once in the beginning so the rest of the ray traversal can compare ints
if (direction.x > 0) {
directionAxes.x = 1;
directionIfReplacements.x = 1;
}
else if (direction.x < 0) {
directionAxes.x = -1;
}
else {
distanceTraveled = maxRayDistance; // just ending the ray for now if one of it's direction axes is exactly 0. You'll see a line of black pixels if the player's rotation is zero but this never happens naturally
directionAxes.x = 0;
}
if (direction.y > 0) {
directionAxes.y = 1;
directionIfReplacements.y = 1;
}
else if (direction.y < 0) {
directionAxes.y = -1;
}
else {
distanceTraveled = maxRayDistance;
directionAxes.y = 0;
}
if (direction.z > 0) {
directionAxes.z = 1;
directionIfReplacements.z = 1;
}
else if (direction.z < 0) {
directionAxes.z = -1;
}
else {
distanceTraveled = maxRayDistance;
directionAxes.z = 0;
}
// calculating the first point
if (playerCameraPosition.x < voxelBufferRowSize &&
playerCameraPosition.x >= 0 &&
playerCameraPosition.y < voxelBufferRowSize &&
playerCameraPosition.y >= 0 &&
playerCameraPosition.z < voxelBufferRowSize &&
playerCameraPosition.z >= 0)
{
int voxelIndex = floor(playerCameraPosition.x) + (floor(playerCameraPosition.z) * voxelBufferRowSize) + (floor(playerCameraPosition.y) * voxelBufferPlaneSize); // the voxel index in the flat array
switch (voxelMaterials[voxelIndex]) {
case 1:
Result[id.xy] = float4(1, 0, 0, 0);
distanceTraveled = maxRayDistance; // to end the while loop
break;
case 2:
Result[id.xy] = float4(0, 1, 0, 0);
distanceTraveled = maxRayDistance;
break;
case 3:
Result[id.xy] = float4(0, 0, 1, 0);
distanceTraveled = maxRayDistance;
break;
default:
break;
}
}
// traversing the ray beyond the first point
while (distanceTraveled < maxRayDistance)
{
switch (face) {
case 1:
distancesXYZ.x = axesUnit.x;
distancesXYZ.y = (floor(pointHolder.y + directionIfReplacements.y) - pointHolder.y) / direction.y;
distancesXYZ.z = (floor(pointHolder.z + directionIfReplacements.z) - pointHolder.z) / direction.z;
break;
case 2:
distancesXYZ.y = axesUnit.y;
distancesXYZ.x = (floor(pointHolder.x + directionIfReplacements.x) - pointHolder.x) / direction.x;
distancesXYZ.z = (floor(pointHolder.z + directionIfReplacements.z) - pointHolder.z) / direction.z;
break;
case 3:
distancesXYZ.z = axesUnit.z;
distancesXYZ.x = (floor(pointHolder.x + directionIfReplacements.x) - pointHolder.x) / direction.x;
distancesXYZ.y = (floor(pointHolder.y + directionIfReplacements.y) - pointHolder.y) / direction.y;
break;
default:
distancesXYZ.x = (floor(pointHolder.x + directionIfReplacements.x) - pointHolder.x) / direction.x;
distancesXYZ.y = (floor(pointHolder.y + directionIfReplacements.y) - pointHolder.y) / direction.y;
distancesXYZ.z = (floor(pointHolder.z + directionIfReplacements.z) - pointHolder.z) / direction.z;
break;
}
face = 0; // 1 = x, 2 = y, 3 = z
float smallestDistance = 1000;
if (distancesXYZ.x < smallestDistance) {
smallestDistance = distancesXYZ.x;
face = 1;
}
if (distancesXYZ.y < smallestDistance) {
smallestDistance = distancesXYZ.y;
face = 2;
}
if (distancesXYZ.z < smallestDistance) {
smallestDistance = distancesXYZ.z;
face = 3;
}
if (smallestDistance == 0) {
break;
}
int3 facesIfReplacement = { 1, 1, 1 };
switch (face) { // directionIfReplacements is positive if positive but I want to subtract so invert it to subtract 1 when negative subtract nothing when positive
case 1:
facesIfReplacement.x = 1 - directionIfReplacements.x;
break;
case 2:
facesIfReplacement.y = 1 - directionIfReplacements.y;
break;
case 3:
facesIfReplacement.z = 1 - directionIfReplacements.z;
break;
}
pointHolder += direction * smallestDistance; // the acual ray marching
distanceTraveled += smallestDistance;
int3 voxelIndexXYZ = { -1,-1,-1 }; // the integer coordinates within the buffer
voxelIndexXYZ.x = ceil(pointHolder.x - facesIfReplacement.x);
voxelIndexXYZ.y = ceil(pointHolder.y - facesIfReplacement.y);
voxelIndexXYZ.z = ceil(pointHolder.z - facesIfReplacement.z);
//check if voxelIndexXYZ is within bounds of the voxel buffer before indexing the array
if (voxelIndexXYZ.x < voxelBufferRowSize &&
voxelIndexXYZ.x >= 0 &&
voxelIndexXYZ.y < voxelBufferRowSize &&
voxelIndexXYZ.y >= 0 &&
voxelIndexXYZ.z < voxelBufferRowSize &&
voxelIndexXYZ.z >= 0)
{
int voxelIndex = voxelIndexXYZ.x + (voxelIndexXYZ.z * voxelBufferRowSize) + (voxelIndexXYZ.y * voxelBufferPlaneSize); // the voxel index in the flat array
switch (voxelMaterials[voxelIndex]) {
case 1:
Result[id.xy] = float4(1, 0, 0, 0) * (1 - (distanceTraveled / maxRayDistance));
distanceTraveled = maxRayDistance; // to end the while loop
break;
case 2:
Result[id.xy] = float4(0, 1, 0, 0) * (1 - (distanceTraveled / maxRayDistance));
distanceTraveled = maxRayDistance;
break;
case 3:
Result[id.xy] = float4(0, 0, 1, 0) * (1 - (distanceTraveled / maxRayDistance));
distanceTraveled = maxRayDistance;
break;
}
}
else {
break; // should be uncommented in actual game implementation where the player will always be inside the voxel buffer
}
}
}
Depending on the voxel data you give it it produces this:
And here is the shader after "optimizing" it and taking out all branching or diverging conditional statements (I think):
#pragma kernel CSMain
RWTexture2D<float4> Result; // the actual array of pixels the player sees
float4 resultHolder;
const float width; // in pixels
const float height;
const Buffer<int> voxelMaterials; // for now just getting a flat voxel array
const Buffer<float4> voxelColors;
const int voxelBufferRowSize;
const int voxelBufferPlaneSize;
const int voxelBufferSize;
const Buffer<float3> rayDirections; // I'm now actually using it as points instead of directions
const float maxRayDistance;
const float3 playerCameraPosition; // relative to the voxelData, ie the first voxel's bottom, back, left corner position, no negative coordinates
const float3 playerWorldForward;
const float3 playerWorldRight;
const float3 playerWorldUp;
[numthreads(16, 16, 1)]
void CSMain(uint3 id : SV_DispatchThreadID)
{
resultHolder = float4(0, 0, 0, 0); // setting the pixel to black by default
float3 pointHolder = playerCameraPosition; // initializing the first point to the player's position
const float3 p = rayDirections[id.x + (id.y * width)]; // vector transformation getting the world space directions of the rays relative to the player
const float3 u1 = p.x * playerWorldRight;
const float3 u2 = p.y * playerWorldUp;
const float3 u3 = p.z * playerWorldForward;
const float3 direction = u1 + u2 + u3; // the transformed ray direction in world space
const bool anyDir0 = direction.x == 0 || direction.y == 0 || direction.z == 0; // preventing a division by zero
float distanceTraveled = maxRayDistance * anyDir0;
const float3 nonZeroDirection = { // to prevent a division by zero
direction.x + (1 * anyDir0),
direction.y + (1 * anyDir0),
direction.z + (1 * anyDir0)
};
const float3 axesUnits = { // the distances if the axis is an integer
1.0f / abs(nonZeroDirection.x),
1.0f / abs(nonZeroDirection.y),
1.0f / abs(nonZeroDirection.z)
};
const bool3 isDirectionPositiveOr0 = {
direction.x >= 0,
direction.y >= 0,
direction.z >= 0
};
while (distanceTraveled < maxRayDistance)
{
const bool3 pointIsAnInteger = {
(int)pointHolder.x == pointHolder.x,
(int)pointHolder.y == pointHolder.y,
(int)pointHolder.z == pointHolder.z
};
const float3 distancesXYZ = {
((floor(pointHolder.x + isDirectionPositiveOr0.x) - pointHolder.x) / direction.x * !pointIsAnInteger.x) + (axesUnits.x * pointIsAnInteger.x),
((floor(pointHolder.y + isDirectionPositiveOr0.y) - pointHolder.y) / direction.y * !pointIsAnInteger.y) + (axesUnits.y * pointIsAnInteger.y),
((floor(pointHolder.z + isDirectionPositiveOr0.z) - pointHolder.z) / direction.z * !pointIsAnInteger.z) + (axesUnits.z * pointIsAnInteger.z)
};
float smallestDistance = min(distancesXYZ.x, distancesXYZ.y);
smallestDistance = min(smallestDistance, distancesXYZ.z);
pointHolder += direction * smallestDistance;
distanceTraveled += smallestDistance;
const int3 voxelIndexXYZ = {
floor(pointHolder.x) - (!isDirectionPositiveOr0.x && (int)pointHolder.x == pointHolder.x),
floor(pointHolder.y) - (!isDirectionPositiveOr0.y && (int)pointHolder.y == pointHolder.y),
floor(pointHolder.z) - (!isDirectionPositiveOr0.z && (int)pointHolder.z == pointHolder.z)
};
const bool inBounds = (voxelIndexXYZ.x < voxelBufferRowSize && voxelIndexXYZ.x >= 0) && (voxelIndexXYZ.y < voxelBufferRowSize && voxelIndexXYZ.y >= 0) && (voxelIndexXYZ.z < voxelBufferRowSize && voxelIndexXYZ.z >= 0);
const int voxelIndexFlat = (voxelIndexXYZ.x + (voxelIndexXYZ.z * voxelBufferRowSize) + (voxelIndexXYZ.y * voxelBufferPlaneSize)) * inBounds; // meaning the voxel on 0,0,0 will always be empty and act as a our index out of range prevention
if (voxelMaterials[voxelIndexFlat] > 0) {
resultHolder = voxelColors[voxelMaterials[voxelIndexFlat]] * (1 - (distanceTraveled / maxRayDistance));
break;
}
if (!inBounds) break;
}
Result[id.xy] = resultHolder;
}
Compute shader is what it is: a program that runs on a GPU, be it on vulkan, or in Unity, so you are doing it in parallel either way. The point of vulkan, however, is that it gives you more control about the commands being executed on GPU - synchronization, memory, etc. So its not neccesseraly going to be faster in vulkan than in unity. So, what you should do is actually optimise your shaders.
Also, the main problem with if/else is divergence within groups of invocations which operate in lock-step. So, if you can avoid it, the performance impact will be far lessened. These may help you with that.
If you still want to do all that in vulkan...
Since you are not going to do any of the triangle rasterisation, you probably won't need renderpasses or graphics pipelines that the tutorials generally show. Instead you are going to need a compute shader pipeline. Those are far simplier than graphics pipelines, only requiring one shader and the pipeline layout(the inputs and outputs are bound via descriptor sets).
You just need to pass the swapchain image to the compute shader as a storage image in a descriptor (and of course any other data your shader may need, all are passed via descriptors). For that you need to specify VK_IMAGE_USAGE_STORAGE_BIT in your swapchain creation structure.
Then, in your command buffer you bind the descriptor sets with image and other data, bind the compute pipeline, and dispatch it as you probably do in Unity. The swapchain presentation and submitting the command buffers shouldn't be different than how the graphics works in the tutorials.
I am working on an android project, which use vudroid, which in turn use mupdf version 0.5.
Vudroid remove the original openjpeg support of mupdf, I have ported the mupdf version 1.5's openjpeg support.
But I encounter a new problem, color information in jpx image gone, the desired effect:
my effect:
the ported load-jpx code:
#include "fitz.h"
#include "mupdf.h"
/* Without the definition of OPJ_STATIC, compilation fails on windows
* due to the use of __stdcall. We believe it is required on some
* linux toolchains too. */
#define OPJ_STATIC
#ifndef _MSC_VER
#define OPJ_HAVE_STDINT_H
#endif
#include <openjpeg.h>
static void fz_opj_error_callback(const char *msg, void *client_data)
{
//fz_context *ctx = (fz_context *)client_data;
//fz_warn(ctx, "openjpeg error: %s", msg);
}
static void fz_opj_warning_callback(const char *msg, void *client_data)
{
//fz_context *ctx = (fz_context *)client_data;
//fz_warn(ctx, "openjpeg warning: %s", msg);
}
static void fz_opj_info_callback(const char *msg, void *client_data)
{
/* fz_warn("openjpeg info: %s", msg); */
}
typedef struct stream_block_s
{
unsigned char *data;
int size;
int pos;
} stream_block;
static OPJ_SIZE_T fz_opj_stream_read(void * p_buffer, OPJ_SIZE_T p_nb_bytes, void * p_user_data)
{
stream_block *sb = (stream_block *)p_user_data;
int len;
len = sb->size - sb->pos;
if (len < 0)
len = 0;
if (len == 0)
return (OPJ_SIZE_T)-1; /* End of file! */
if ((OPJ_SIZE_T)len > p_nb_bytes)
len = p_nb_bytes;
memcpy(p_buffer, sb->data + sb->pos, len);
sb->pos += len;
return len;
}
static OPJ_OFF_T fz_opj_stream_skip(OPJ_OFF_T skip, void * p_user_data)
{
stream_block *sb = (stream_block *)p_user_data;
if (skip > sb->size - sb->pos)
skip = sb->size - sb->pos;
sb->pos += skip;
return sb->pos;
}
static OPJ_BOOL fz_opj_stream_seek(OPJ_OFF_T seek_pos, void * p_user_data)
{
stream_block *sb = (stream_block *)p_user_data;
if (seek_pos > sb->size)
return OPJ_FALSE;
sb->pos = seek_pos;
return OPJ_TRUE;
}
fz_error
fz_load_jpx(pdf_image* img, unsigned char *data, int size, fz_colorspace *defcs, int indexed)
{
//fz_pixmap *img;
opj_dparameters_t params;
opj_codec_t *codec;
opj_image_t *jpx;
opj_stream_t *stream;
fz_colorspace *colorspace;
unsigned char *p;
OPJ_CODEC_FORMAT format;
int a, n, w, h, depth, sgnd;
int x, y, k, v;
stream_block sb;
if (size < 2)
fz_throw("not enough data to determine image format");
/* Check for SOC marker -- if found we have a bare J2K stream */
if (data[0] == 0xFF && data[1] == 0x4F)
format = OPJ_CODEC_J2K;
else
format = OPJ_CODEC_JP2;
opj_set_default_decoder_parameters(¶ms);
if (indexed)
params.flags |= OPJ_DPARAMETERS_IGNORE_PCLR_CMAP_CDEF_FLAG;
codec = opj_create_decompress(format);
opj_set_info_handler(codec, fz_opj_info_callback, 0);
opj_set_warning_handler(codec, fz_opj_warning_callback, 0);
opj_set_error_handler(codec, fz_opj_error_callback, 0);
if (!opj_setup_decoder(codec, ¶ms))
{
fz_throw("j2k decode failed");
}
stream = opj_stream_default_create(OPJ_TRUE);
sb.data = data;
sb.pos = 0;
sb.size = size;
opj_stream_set_read_function(stream, fz_opj_stream_read);
opj_stream_set_skip_function(stream, fz_opj_stream_skip);
opj_stream_set_seek_function(stream, fz_opj_stream_seek);
opj_stream_set_user_data(stream, &sb);
/* Set the length to avoid an assert */
opj_stream_set_user_data_length(stream, size);
if (!opj_read_header(stream, codec, &jpx))
{
opj_stream_destroy(stream);
opj_destroy_codec(codec);
fz_throw("Failed to read JPX header");
}
if (!opj_decode(codec, stream, jpx))
{
opj_stream_destroy(stream);
opj_destroy_codec(codec);
opj_image_destroy(jpx);
fz_throw("Failed to decode JPX image");
}
opj_stream_destroy(stream);
opj_destroy_codec(codec);
/* jpx should never be NULL here, but check anyway */
if (!jpx)
fz_throw("opj_decode failed");
pdf_logimage("opj_decode succeeded");
for (k = 1; k < (int)jpx->numcomps; k++)
{
if (!jpx->comps[k].data)
{
opj_image_destroy(jpx);
fz_throw("image components are missing data");
}
if (jpx->comps[k].w != jpx->comps[0].w)
{
opj_image_destroy(jpx);
fz_throw("image components have different width");
}
if (jpx->comps[k].h != jpx->comps[0].h)
{
opj_image_destroy(jpx);
fz_throw("image components have different height");
}
if (jpx->comps[k].prec != jpx->comps[0].prec)
{
opj_image_destroy(jpx);
fz_throw("image components have different precision");
}
}
n = jpx->numcomps;
w = jpx->comps[0].w;
h = jpx->comps[0].h;
depth = jpx->comps[0].prec;
sgnd = jpx->comps[0].sgnd;
if (jpx->color_space == OPJ_CLRSPC_SRGB && n == 4) { n = 3; a = 1; }
else if (jpx->color_space == OPJ_CLRSPC_SYCC && n == 4) { n = 3; a = 1; }
else if (n == 2) { n = 1; a = 1; }
else if (n > 4) { n = 4; a = 1; }
else { a = 0; }
if (defcs)
{
if (defcs->n == n)
{
colorspace = defcs;
}
else
{
fz_warn("jpx file and dict colorspaces do not match");
defcs = NULL;
}
}
if (!defcs)
{
switch (n)
{
case 1: colorspace = pdf_devicegray; break;
case 3: colorspace = pdf_devicergb; break;
case 4: colorspace = pdf_devicecmyk; break;
}
}
//error = fz_new_pixmap(&img, colorspace, w, h);
//if (error)
// return error;
pdf_logimage("colorspace handled\n");
int bpc = 1;
if (colorspace) {
bpc = 1 + colorspace->n;
};
pdf_logimage("w = %d, bpc = %d, h = %d\n", w, bpc, h);
img->samples = fz_newbuffer(w * bpc * h);
//opj_image_destroy(jpx);
//fz_throw("out of memory loading jpx");
p = (char*)img->samples->bp;
pdf_logimage("start to deal with samples");
for (y = 0; y < h; y++)
{
for (x = 0; x < w; x++)
{
for (k = 0; k < n + a; k++)
{
v = jpx->comps[k].data[y * w + x];
if (sgnd)
v = v + (1 << (depth - 1));
if (depth > 8)
v = v >> (depth - 8);
*p++ = v;
}
if (!a)
*p++ = 255;
}
}
img->samples->wp = p;
pdf_logimage("start to deal with samples succeeded");
opj_image_destroy(jpx);
// if (a)
// {
// if (n == 4)
// {
// fz_pixmap *tmp = fz_new_pixmap(ctx, fz_device_rgb(ctx), w, h);
// fz_convert_pixmap(ctx, tmp, img);
// fz_drop_pixmap(ctx, img);
// img = tmp;
// }
// fz_premultiply_pixmap(ctx, img);
// }
return fz_okay;
}
The render code:
JNIEXPORT jbyteArray JNICALL Java_org_vudroid_pdfdroid_codec_PdfPage_drawPage
(JNIEnv *env, jclass clazz, jlong dochandle, jlong pagehandle)
{
renderdocument_t *doc = (renderdocument_t*) dochandle;
renderpage_t *page = (renderpage_t*) pagehandle;
//DEBUG("PdfView(%p).drawpage(%p, %p)", this, doc, page);
fz_error error;
fz_matrix ctm;
fz_irect viewbox;
fz_pixmap *pixmap;
jfloat *matrix;
jint *viewboxarr;
jint *dimen;
jint *buffer;
int length, val;
pixmap = nil;
/* initialize parameter arrays for MuPDF */
ctm.a = 1;
ctm.b = 0;
ctm.c = 0;
ctm.d = 1;
ctm.e = 0;
ctm.f = 0;
// matrix = (*env)->GetPrimitiveArrayCritical(env, matrixarray, 0);
// ctm.a = matrix[0];
// ctm.b = matrix[1];
// ctm.c = matrix[2];
// ctm.d = matrix[3];
// ctm.e = matrix[4];
// ctm.f = matrix[5];
// (*env)->ReleasePrimitiveArrayCritical(env, matrixarray, matrix, 0);
// DEBUG("Matrix: %f %f %f %f %f %f",
// ctm.a, ctm.b, ctm.c, ctm.d, ctm.e, ctm.f);
// viewboxarr = (*env)->GetPrimitiveArrayCritical(env, viewboxarray, 0);
// viewbox.x0 = viewboxarr[0];
// viewbox.y0 = viewboxarr[1];
// viewbox.x1 = viewboxarr[2];
// viewbox.y1 = viewboxarr[3];
// (*env)->ReleasePrimitiveArrayCritical(env, viewboxarray, viewboxarr, 0);
// DEBUG("Viewbox: %d %d %d %d",
// viewbox.x0, viewbox.y0, viewbox.x1, viewbox.y1);
viewbox.x0 = 0;
viewbox.y0 = 0;
viewbox.x1 = 595;
viewbox.y1 = 841;
/* do the rendering */
DEBUG("doing the rendering...");
//buffer = (*env)->GetPrimitiveArrayCritical(env, bufferarray, 0);
// do the actual rendering:
error = fz_rendertree(&pixmap, doc->rast, page->page->tree,
ctm, viewbox, 1);
/* evil magic: we transform the rendered image's byte order
*/
int x, y;
if (bmpdata)
fz_free(bmpdata);
bmpstride = ((pixmap->w * 3 + 3) / 4) * 4;
bmpdata = fz_malloc(pixmap->h * bmpstride);
DEBUG("inside drawpage, bmpstride = %d, pixmap->w = %d, pixmap->h = %d\n", bmpstride, pixmap->w, pixmap->h);
if (!bmpdata)
return;
for (y = 0; y < pixmap->h; y++)
{
unsigned char *p = bmpdata + y * bmpstride;
unsigned char *s = pixmap->samples + y * pixmap->w * 4;
for (x = 0; x < pixmap->w; x++)
{
p[x * 3 + 0] = s[x * 4 + 3];
p[x * 3 + 1] = s[x * 4 + 2];
p[x * 3 + 2] = s[x * 4 + 1];
}
}
FILE* fp = fopen("/sdcard/drawpage", "wb");
fwrite(bmpdata, pixmap->h * bmpstride, 1, fp);
fclose(fp);
jbyteArray array = (*env)->NewByteArray(env, pixmap->h * bmpstride);
(*env)->SetByteArrayRegion(env, array, 0, pixmap->h * bmpstride, bmpdata);
// if(!error) {
// DEBUG("Converting image buffer pixel order");
// length = pixmap->w * pixmap->h;
// unsigned int *col = pixmap->samples;
// int c = 0;
// for(val = 0; val < length; val++) {
// col[val] = ((col[val] & 0xFF000000) >> 24) |
// ((col[val] & 0x00FF0000) >> 8) |
// ((col[val] & 0x0000FF00) << 8);
// }
// winconvert(pixmap);
// }
// (*env)->ReleasePrimitiveArrayCritical(env, bufferarray, buffer, 0);
fz_free(pixmap);
if (error) {
DEBUG("error!");
throw_exception(env, "error rendering page");
}
DEBUG("PdfView.drawPage() done");
return array;
}
I have compare the jpx output samples to the mupdf-1.5 windows, it is the same, but the colorspace of original jpx have gone.
Could help me to get the colorspace back?
It seems you are trying to use an old version of MuPDF with some bits pulled in from a more recent version. TO be honest that's hardly likely to work. I would also guess that its not the OpenJPEG library causing your problem, since the image appears, but converted to grayscale.
Have you tried opening the file in the current version of MuPDF ? Does it work ?
If so then it seems to me your correct approach should be to use the current code, not try and bolt pieces onto an older version.
So I've been working in Processing for a few weeks now, and, though I'm not experienced in programming, I have moved on to more complex projects. I'm programming an evolution simulator, that spawns creatures with random properties.
Eventually, I'll add reproduction, but as of now the creatures just sort of float around the screen, and follow the mouse somewhat. It interacts with sound from the line in, but I commented those parts out so that it can be viewed on the canvas, it shouldn't really change the question, I just thought I would point it out.
As of now, the framerate is far less than ideal for me, and it slowly lowers as more creatures are spawned. Am I making some fundamental mistake, or am I just running too many functions per frame?
Here's the source code, and you can play with it in the browser here:
//import ddf.minim.*;
//import ddf.minim.signals.*;
//import ddf.minim.analysis.*;
//import ddf.minim.effects.*;
//Minim minim;
//AudioInput in;
boolean newCreature = true;
boolean matured[];
int ellipses[];
int hair[];
int maxCreatureNumber = 75;
//int volume;
//int volumeTolerance = 1;
int creatureIndex = -1;
int creatureX[];
int creatureY[];
float strokeWeightAttribute[];
float creatureSize[];
float creatureEndSize[];
float creatureXIncrement[];
float creatureYIncrement[];
float bubbleSize;
float easing = 0.05;
float angle = 0.00;
color colorAttribute[];
void setup() {
background(0);
size(1000,500);
noFill();
//minim = new Minim(this);
//in = minim.getLineIn(Minim.STEREO, 512);
creatureX = new int[maxCreatureNumber];
creatureY = new int[maxCreatureNumber];
ellipses = new int[maxCreatureNumber];
hair = new int[maxCreatureNumber];
strokeWeightAttribute = new float[maxCreatureNumber];
creatureEndSize = new float[maxCreatureNumber];
creatureSize = new float[maxCreatureNumber];
creatureXIncrement = new float[maxCreatureNumber];
creatureYIncrement = new float[maxCreatureNumber];
matured = new boolean[maxCreatureNumber];
colorAttribute = new color[maxCreatureNumber];
}
void draw() {
angle += 0.05;
fill(0, 50);
rect(-1, -1, 1001, 501);
// for(int i = 0; i < in.bufferSize() - 1; i++) {
// if(in.mix.get(i) * 50 > volumeTolerance) {
// volume++;
// }
// }
if(newCreature && creatureIndex < maxCreatureNumber - 1) {
initSpontaneousCreature();
}
updateCreatures();
// bubbleSize = volume/250;
bubbleSize += 0.01;
// volume = 0;
}
//void stop() {
// minim.stop();
// super.stop();
//}
void initSpontaneousCreature() {
creatureIndex++;
creatureEndSize[creatureIndex] = int(random(5, 20));
creatureX[creatureIndex] = int(random(1000));
if(creatureX[creatureIndex] >= 500) {
creatureX[creatureIndex] -= creatureEndSize[creatureIndex];
}
else {
creatureX[creatureIndex] += creatureEndSize[creatureIndex];
}
creatureY[creatureIndex] = int(random(500));
if(creatureY[creatureIndex] >= 250) {
creatureY[creatureIndex] -= creatureEndSize[creatureIndex];
}
else {
creatureY[creatureIndex] += creatureEndSize[creatureIndex];
}
ellipses[creatureIndex] = int(random(4));
hair[creatureIndex] = int(random(4));
strokeWeightAttribute[creatureIndex] = random(1, 4);
colorAttribute[creatureIndex] = color(int(random(20,255)), int(random(20,255)), int(random(20,255)));
matured[creatureIndex] = false;
newCreature = false;
while(ellipses[creatureIndex] == 0 && hair[creatureIndex] == 0) {
ellipses[creatureIndex] = int(random(4));
hair[creatureIndex] = int(random(4));
}
}
void updateCreatures() {
for(int n = 0; n <= creatureIndex; n++) {
if(matured[n]) {
creatureX[n] += ((((mouseX) - creatureX[n]) * easing) / (60/*-abs(volume/5))*/)) + random(-5, 6);
creatureY[n] += ((((mouseY) -creatureY[n]) * easing) / (60/*-abs(/*volume/5))*/)) + random(-5,6);
drawCreature();
}
else {
if(creatureEndSize[n] != creatureSize[n]) {
creatureSize[n] += bubbleSize;
if(creatureSize[n] > creatureEndSize[n]) {
creatureSize[n] -= (creatureSize[n] - creatureEndSize[n]);
}
}
else {
newCreature = true;
matured[n] = true;
// bubbleSize = 0;
}
drawCreature();
}
}
}
void drawCreature() {
for(int n = 0; n <= creatureIndex; n++) {
if(matured[n]) {
stroke(colorAttribute[n]);
strokeWeight(strokeWeightAttribute[n]);
for(int i = 0; i <= 4; i++) {
if(ellipses[n] == i) {
if(i == 0) {
}
else if (i == 1) {
pushMatrix();
translate(creatureX[n], creatureY[n]);
ellipse(creatureSize[n], creatureSize[n], creatureSize[n], creatureSize[n]);
rotate(radians(180));
ellipse(creatureSize[n], creatureSize[n], creatureSize[n], creatureSize[n]);
popMatrix();
}
else if(i == 2) {
pushMatrix();
translate(creatureX[n], creatureY[n]);
ellipse(creatureSize[n], creatureSize[n], creatureSize[n], creatureSize[n]);
rotate(radians(180));
ellipse(creatureSize[n], creatureSize[n], creatureSize[n], creatureSize[n]);
rotate(radians(270));
ellipse(creatureSize[n], creatureSize[n], creatureSize[n], creatureSize[n]);
popMatrix();
}
else if(i == 3) {
pushMatrix();
translate(creatureX[n], creatureY[n]);
ellipse(creatureSize[n], creatureSize[n], creatureSize[n], creatureSize[n]);
rotate(radians(90));
ellipse(creatureSize[n], creatureSize[n], creatureSize[n], creatureSize[n]);
rotate(radians(180));
ellipse(creatureSize[n], creatureSize[n], creatureSize[n], creatureSize[n]);
rotate(radians(270));
ellipse(creatureSize[n], creatureSize[n], creatureSize[n], creatureSize[n]);
popMatrix();
}
}
if(hair[n] == i) {
if(i == 0) {
}
else if (i == 1) {
pushMatrix();
translate(creatureX[n], creatureY[n]);
for(int j = 0; j <= 360; j+=70) {
rotate(j);
stroke(colorAttribute[n], random(255));
line(0,0, creatureSize[n] + random(10), creatureSize[n] + random(10));
}
popMatrix();
}
else if(i == 2) {
pushMatrix();
translate(creatureX[n], creatureY[n]);
for(int j = 0; j <= 360; j+=30) {
rotate(j);
stroke(colorAttribute[n], random(255));
line(0,0, creatureSize[n] + random(10), creatureSize[n] + random(10));
}
popMatrix();
}
else if(i == 3) {
pushMatrix();
translate(creatureX[n], creatureY[n]);
for(int j = 0; j <= 360; j+=1) {
rotate(j);
stroke(colorAttribute[n], random(255));
line(0,0, creatureSize[n] + random(10), creatureSize[n] + random(10));
}
popMatrix();
}
}
}
}
if(!matured[n]) {
stroke(abs(sin(angle) * 255));
//strokeWeight(5);
ellipse(creatureX[n], creatureY[n], creatureSize[n] * 5, creatureSize[n] * 5);
noStroke();
}
}
}
Right, as I suspected, all the unnecessary pushMatrix(), popMatrix() calls and the large amount of lines seemed to be the main culprits, still, there was a lot of redundant code.
I simply refactored the code in a cleaner manner and it seems to run fine.
Here is my 'improved' version:
int maxCreatures = 75;
int numCreatures = 0;
int spawnNthFrame = 50;//spawn a creature every 50 frames
Creature[] creatures;
void setup() {
background(0);
size(1000,500);
noFill();
creatures = new Creature[maxCreatures];
}
void draw() {
fill(0, 50);
rect(-1, -1, 1001, 501);
if(frameCount % spawnNthFrame == 0){
println("creatures: " + numCreatures);
if(numCreatures < maxCreatures) {
//Creature constructor float endSize,int x, int y,int ellipses,int hair,float strokeW,color c
creatures[numCreatures] = new Creature(random(5, 20),int(random(1000)),int(random(500)),int(random(4)),int(random(4)),random(1, 4),color(int(random(20,255)), int(random(20,255)), int(random(20,255))));
numCreatures++;
}
}
for(int i = 0; i < numCreatures; i++) creatures[i].update();
}
and the Creature class:
class Creature{
int x,y,cXInc,cYInc;//if x,y are ints, increments would be into, right?
float cStrokeWeight,cSize,cEndSize,cSizeInc = 0.01,easing = 0.05,angle = 0.00;
color cColor;
int hair,numHair,ellipses;
boolean matured = false;
Creature(float endSize,int x, int y,int ellipses,int hair,float strokeW,color c){
cEndSize = endSize;
this.x = x;
if(x >= 500) x -= cEndSize;
else x += cEndSize;
this.y = y;
if(y >= 250) x -= cEndSize;
else x += cEndSize;
this.ellipses = ellipses;
this.hair = hair;
if(hair == 1) numHair = 3;//~5, half that, draw through centre, etc.
if(hair == 2) numHair = 6;
if(hair == 3) numHair = 30;//no default value
cStrokeWeight = strokeW;
this.cColor = c;
}
void update(){
if(matured) {
x += (((mouseX - x) * easing) / 60) + random(-5, 6);
y += (((mouseY - y) * easing) / 60) + random(-5, 6);
}else {
if(cSize < cEndSize) cSize += cSizeInc;
else matured = true;
angle += 0.05;
}
this.draw();
}
void draw(){
if(matured){
stroke(cColor);
strokeWeight(cStrokeWeight);
if(ellipses == 1){//2 ellipses diagonally
ellipse(x,y,cSize,cSize);
ellipse(x+cSize,y+cSize,cSize,cSize);
}
if(ellipses == 2){
ellipse(x,y,cSize,cSize);
ellipse(x,y+cSize,cSize,cSize);
ellipse(x+cSize,y+cSize,cSize,cSize);
}
if(ellipses == 3){
ellipse(x,y,cSize,cSize);
ellipse(x+cSize,y,cSize,cSize);
ellipse(x,y+cSize,cSize,cSize);
ellipse(x+cSize,y+cSize,cSize,cSize);
}
float hairAngleInc = TWO_PI/numHair;//angle increment for each piece = 360/number of hair lines
float hairAngle,hairLength,hairCos,hairSin;
for(int i = 0; i < numHair; i++){
hairAngle = hairAngleInc * i;
hairCos = cos(hairAngle);
hairSin = sin(hairAngle);
hairLength = random(20);
stroke(cColor, random(255));
line(x + (hairCos * -hairLength),y + (hairSin * -hairLength), x + (hairCos * hairLength),y + (hairSin * hairLength));
}
}else{
stroke(abs(sin(angle) * 255));
ellipse(x,y, cSize * 5, cSize * 5);
}
}
}
Ok, now for the explanations.
First, I separated all the variables that were related to one creature from the 'global' ones that determine how the sketch runs (how many creatures get spawned, etc.).
This makes the main code about 25 lines long and altogether a bit below 100 lines which is less than half of the original.
The first part doesn't do anything special. In the draw() function, instead of creating a Creature every frame, I draw one every Nth frame using the spawnNthFrame variable, this made it easy to see which state of the creature made it slow. If you set a small number like 2 to that variable it should spawn a lot of creatures per frame.
The Creature class has all the properties the original code stored in arrays.
Instead of doing
pushMatrix();
translate();
ellipse();
rotate()
ellipse()
popMatrix();
I simply draw the ellipses at x,y.
A little hint on the rotations. I've noticed they were increments
of 90 degrees. Processing has some nice constants for 90,180,360 degrees
in radians: HALF_PI, PI, TWO_PI which can be handy sometimes.
Now for the 'hairy' situation, here's something I commented out for myself:
//if(i == 1) for(int j = 0; j <= 360; j+=70) , well 360/70 is about 5, if (i == 2) , 12 hair
//if = 3-> 360 lines ? do you really need that many lines, that thick ? how about 30 ? 5*12=60, but if you draw the lines through the centre, not from the centre, you can get away with half the lines
So there were 3 loops for drawing lines, each having different increments. Basically
there were either 360/70 lines, 360/30 lines and 360 lines.
Roughly about 5,12 and 360 lines. About the 5,12 lines, I kind of halved that by drawing 'diameter' lines across the centre as opposed to 'radius' lines from the centre.
Here's what I mean,
Also I think that 360 lines with that strokeWeight and the jittery motion will probably look like a bunch of lines hard to count, so I thought, why split hairs? :P
Maybe the creature will look pretty similar with about 60 radii which means 30 diameters.
Now to explain a bit of the trig functions used for this.
The main thing is the 'polar to cartesian' coordinates conversion:
Polar would be something like:
"I am moving on a circle to a direction described by an angle (much like one handle of a clock) and radius (distance from centre)."
and Cartesian
"I'm moving based on two axes (horizontal/X and vertical/Y), kind of like the streets of Manhattan, but I cheat and also move diagonally through walls."
If that makes any sense... :)
Anyway, you convert the angle and radius pair to the x and y pair using the formula:
x = cos(angle) * radius
y = sin(angle) * radius
For each line:
angle = hairAngle
radius = hairLength
So the line() with *x + (hairCos * -hairLength)* looks a bit like this:
x + (hairCos * -hairLength) =
move to x and from there move by hairLength
to the left(-) for the current angle (hairCos)
Similar for y, but using cos, so this puts the first point of the line in the opposite direct (-hairLength) of the angle moving from the centre (which is the Creature's x) and the second is 'diagonal'. Imagine drawing 'diagonals' (from (-x,-y) to (+x,+y)), but you also rotate these.
Update
Apparently copy/pasting this code works in javascript too (best viewed in Chromium/Chrome). You can also run it right here:
var maxCreatures = 75;
var numCreatures = 0;
var spawnNthFrame = 50;//spawn a creature every 50 frames
var creatures = [];
function setup() {
background(0);
createCanvas(1000,500);
noFill();
}
function draw() {
fill(0, 50);
rect(-1, -1, 1001, 501);
if(frameCount % spawnNthFrame === 0){
println("creatures: " + numCreatures);
if(numCreatures < maxCreatures) {
//Creature constructor float endSize,int x, int y,int ellipses,int hair,float strokeW,color c
creatures[numCreatures] = new Creature(random(5, 20),int(random(1000)),int(random(500)),int(random(4)),int(random(4)),random(1, 4),color(int(random(20,255)), int(random(20,255)), int(random(20,255))));
numCreatures++;
}
}
for(var i = 0; i < numCreatures; i++) creatures[i].update();
}
function Creature(endSize,x,y,ellipses,hair,strokeW,c){
this.x = x;
this.y = y;
this.ellipses = ellipses;
this.hair = hair;
this.numHair = 0;
this.cStrokeWeight = strokeW;
this.cColor = c;
this.cXInc = 0;
this.cYInc = 0.01;
this.cSize = 0;
this.cEndSize = endSize;
this.easing = 0.05;
this.angle = 0.0;
this.matured = false;
if(x >= 500) x -= this.cEndSize;
else x += this.cEndSize;
if(y >= 250) x -= this.cEndSize;
else x += this.cEndSize;
if(hair == 1) this.numHair = 3;//~5, half that, draw through centre, etc.
if(hair == 2) this.numHair = 6;
if(hair == 3) this.numHair = 30;//no default value
this.update = function(){
if(this.matured) {
this.x += (((mouseX - this.x) * this.easing) / 60) + random(-5, 6);
this.y += (((mouseY - this.y) * this.easing) / 60) + random(-5, 6);
}else {
if(this.cSize < this.cEndSize) this.cSize += this.cSizeInc;
else this.matured = true;
this.angle += 0.05;
}
this.draw();
}
this.draw = function(){
if(this.matured){
stroke(this.cColor);
strokeWeight(this.cStrokeWeight);
if(this.ellipses == 1){//2 ellipses diagonally
ellipse(this.x,this.y,this.cSize,this.cSize);
ellipse(this.x+this.cSize,this.y+this.cSize,this.cSize,this.cSize);
}
if(this.ellipses == 2){
ellipse(this.x,this.y,this.cSize,this.cSize);
ellipse(this.x,this.y+this.cSize,this.cSize,this.cSize);
ellipse(this.x+this.cSize,this.y+this.cSize,this.cSize,this.cSize);
}
if(this.ellipses == 3){
ellipse(this.x,this.y,this.cSize,this.cSize);
ellipse(this.x+this.cSize,this.y,this.cSize,this.cSize);
ellipse(this.x,this.y+this.cSize,this.cSize,this.cSize);
ellipse(this.x+this.cSize,this.y+this.cSize,this.cSize,this.cSize);
}
var hairAngleInc = TWO_PI/this.numHair;//angle increment for each piece = 360/number of hair lines
var hairAngle,hairLength,hairCos,hairSin;
for(var i = 0; i < this.numHair; i++){
hairAngle = hairAngleInc * i;
hairCos = cos(hairAngle);
hairSin = sin(hairAngle);
hairLength = random(20);
stroke(this.cColor, random(255));
line(this.x + (hairCos * -hairLength),this.y + (hairSin * -hairLength), this.x + (hairCos * hairLength),this.y + (hairSin * hairLength));
}
}else{
stroke(abs(sin(this.angle) * 255));
ellipse(this.x,this.y, this.cSize * 5, this.cSize * 5);
}
}
}
<script src="https://cdnjs.cloudflare.com/ajax/libs/p5.js/0.4.4/p5.min.js"></script>
You could use the frameRate(fps)function. What it does is, it specifies the number of frames to be displayed every second. However, If the processor is not fast enough to maintain the specified rate, it will not be achieved. For example, the function call frameRate(30) will attempt to refresh 30 times a second. It is recommended to set the frame rate within setup().
Remember, using draw() without specifying the frame rate, by default it will run at 60 fps.
Well, there's the good old random-pause method. It's the "poor man's profiler".
Just snapshot it a few times. That will show you exactly what's taking the most time. Those are the things you should see if you can make faster.
It will show up in increased framerate.