what highly parallel direct compute code can look like

  • 0 Replies
  • 2847 Views
*

MagnusWootton

  • Replicant
  • ********
  • 634
what highly parallel direct compute code can look like
« on: July 03, 2023, 02:25:04 pm »
Code: "c++"
#include "tracker3d.h"

CS init_threshbuf_initial;
CS init_threshbuf;
CS count_corners;
CS step_threshbuf;
CS sobel;
CS blur;
CS dmax;
CS sobel_lsbrem;
CS corner;
CS finalcorner;
CS get_output;
CS declump;
CS density;
CS generate3dcorners;
CS interpolate;
CS validate_corner;
CS density_init;

//new zazzy memory system
CS build_keys;
CS init_jumps;
CS init_jumpmap_sim;
CS init_codes;
CS increment_jumpmapiteration;
CS precompute_fixed_jumps;
CS match;
CS trainkey_buildinsertmap;
CS trainkey_update_jumps;
CS trainkey_update_codelist;
CS trainkey_add_codes;

//MEMORY RESOURCES
RES matchmap;
RES jumpmap_sim;
RES jumpmap;
RES codemap;
RES insertmap;
RES densitymem;
RES cornerdistvalid;

CS densityupdate;

RES corners3d;
RES camera;
RES blurmap;
RES sobelmap;
RES sobelmap_lsbrem;
RES cornermap;
RES cornermap2;
RES corner_count;
RES threshbuf;
RES output;
RES densitymap;
RES keymap;
RES depthmap;
BUF outputbuf;
BUF output3dbuf;
int screenx=GetSystemMetrics(SM_CXSCREEN);
int screeny = GetSystemMetrics(SM_CYSCREEN);
uchar* image= new uchar[screenx*4*screeny];

void init_tracker(void)
{
 CreateComputeShader( L"tracker3d.hlsl", "declump",                    DEV,    &declump);
 CreateComputeShader( L"tracker3d.hlsl", "blur",                       DEV,    &blur);
 CreateComputeShader( L"tracker3d.hlsl", "dmax",                       DEV,    &dmax);
 CreateComputeShader( L"tracker3d.hlsl", "sobel",                      DEV,    &sobel);
 CreateComputeShader( L"tracker3d.hlsl", "sobel_lsbrem",               DEV,    &sobel_lsbrem);
 CreateComputeShader( L"tracker3d.hlsl", "init_threshbuf_initial",     DEV,    &init_threshbuf_initial);
 CreateComputeShader( L"tracker3d.hlsl", "init_threshbuf",             DEV,    &init_threshbuf);
 CreateComputeShader( L"tracker3d.hlsl", "step_threshbuf",             DEV,    &step_threshbuf);
 CreateComputeShader( L"tracker3d.hlsl", "corner",                     DEV,    &corner);
 CreateComputeShader( L"tracker3d.hlsl", "validate_corner",            DEV,    &validate_corner);
 CreateComputeShader( L"tracker3d.hlsl", "finalcorner",                DEV,    &finalcorner);
 CreateComputeShader( L"tracker3d.hlsl", "count_corners",              DEV,    &count_corners);
 CreateComputeShader( L"tracker3d.hlsl", "density",                    DEV,    &density);
 CreateComputeShader( L"tracker3d.hlsl", "build_keys",                 DEV,    &build_keys);
 CreateComputeShader( L"tracker3d.hlsl", "get_output",                 DEV,    &get_output);
 CreateComputeShader( L"tracker3d.hlsl", "generate3dcorners",          DEV,    &generate3dcorners);
 CreateComputeShader( L"tracker3d.hlsl", "interpolate",                DEV,    &interpolate);
 CreateComputeShader( L"tracker3d.hlsl", "densityupdate",              DEV,    &densityupdate);
 CreateComputeShader( L"tracker3d.hlsl", "density_init",               DEV,    &density_init);

 //MEMORY SYSTEM
 CreateComputeShader( L"tracker3d.hlsl", "build_keys",                 DEV,    &build_keys);
 CreateComputeShader( L"tracker3d.hlsl", "init_jumps",                 DEV,    &init_jumps);
 CreateComputeShader( L"tracker3d.hlsl", "init_codes",                 DEV,    &init_codes);
 CreateComputeShader( L"tracker3d.hlsl", "init_jumpmap_sim",           DEV,    &init_jumpmap_sim);
 CreateComputeShader( L"tracker3d.hlsl", "increment_jumpmapiteration", DEV,    &increment_jumpmapiteration);
 CreateComputeShader( L"tracker3d.hlsl", "precompute_fixed_jumps",     DEV,    &precompute_fixed_jumps);
 CreateComputeShader( L"tracker3d.hlsl", "match",                      DEV,    &match);
 CreateComputeShader( L"tracker3d.hlsl", "trainkey_buildinsertmap",    DEV,    &trainkey_buildinsertmap);
 CreateComputeShader( L"tracker3d.hlsl", "trainkey_update_jumps",      DEV,    &trainkey_update_jumps);
 CreateComputeShader( L"tracker3d.hlsl", "trainkey_update_codelist",   DEV,    &trainkey_update_codelist);
 CreateComputeShader( L"tracker3d.hlsl", "trainkey_add_codes",         DEV,    &trainkey_add_codes);


 IBT* idata=new IBT[RETINA_X*4*RETINA_Y];
 memset(idata,0,RETINA_X*4*RETINA_Y*4);

 codemap         =create_res(ROOTCODEMAP*ROOTCODEMAP,sizeof(UBT),idata);
 matchmap        =create_res(RETINA_X*RETINA_Y*2    ,sizeof(UBT),idata);
 jumpmap_sim     =create_res(256*256*MM_JUMPCAP     ,sizeof(UBT),idata);
 jumpmap         =create_res(256*256                ,sizeof(UBT),idata);
 
 uint imsize=0;
 uint i;
 for(i=0;i<INSERTMIPS;i++){imsize+=(ROOTCODEMAP*ROOTCODEMAP)>>i;}
 insertmap       =create_res(imsize                 ,sizeof(UBT),idata);
 densitymem      =create_res(MM_MAXMEM*2            ,sizeof(IBT),idata);
 cornerdistvalid =create_res(RETINA_X*RETINA_Y      ,sizeof(IBT),idata);
 corners3d       =create_res(RETINA_X*RETINA_Y      ,sizeof(IBT),idata);
 depthmap        =create_res(RETINA_X*RETINA_Y      ,sizeof(IBT),idata);
 blurmap         =create_res(RETINA_X*4*RETINA_Y    ,sizeof(IBT),idata);
 sobelmap        =create_res(RETINA_X*2*RETINA_Y    ,sizeof(IBT),idata);
 densitymap      =create_res(RETINA_X*RETINA_Y      ,sizeof(IBT),idata);
 sobelmap_lsbrem =create_res(RETINA_X*2*RETINA_Y    ,sizeof(IBT),idata);
 cornermap       =create_res(RETINA_X*RETINA_Y      ,sizeof(IBT),idata);
 cornermap2      =create_res(RETINA_X*RETINA_Y      ,sizeof(IBT),idata);
 corner_count    =create_res(RETINA_X*RETINA_Y      ,sizeof(IBT),idata);
 threshbuf       =create_res(5                      ,sizeof(IBT),idata);
 output          =create_res(RETINA_X*4*RETINA_Y    ,sizeof(IBT),idata);

 outputbuf       =createdebugbuffer(output);
 output3dbuf     =createdebugbuffer(depthmap);

 UBT data1[7];
 data1[0].u=RETINA_X;
 data1[1].u=RETINA_Y;
 RES constantbuf=create_res(2, sizeof(UBT), data1);

 call(256/TBLOCK,256/TBLOCK,1,false,0                    ,init_jumps  ,jumpmap    ,1,constantbuf);
 call(ROOTCODEMAP/TBLOCK,ROOTCODEMAP/TBLOCK,1,false,0    ,init_codes  ,codemap    ,1,constantbuf);
 call(MM_ROOTMAXMEM/TBLOCK,MM_ROOTMAXMEM/TBLOCK,1,false,0,density_init,densitymem ,1,constantbuf);

 release_res(constantbuf);


 delete idata;
}


void run_tracker(void)
{
 UBT data1[7];
 data1[0].u=RETINA_X;
 data1[1].u=RETINA_Y;
 RES constantbuf=create_res(2, sizeof(UBT), data1);
 
 //get the camera out of the top left corner.
 IBT* grab_image=new IBT[RETINA_X*4*RETINA_Y];
 
 uint OFFSETX=0;
 uint OFFSETY=200;
 int i,j;
 for(i=0;i<RETINA_X;i++)
 {
  for(j=0;j<RETINA_Y;j++)
  {
   grab_image[(i+j*RETINA_X)*4+0].i=image[((i*2+OFFSETX)+(j*2+OFFSETY)*screenx)*4+2];
   grab_image[(i+j*RETINA_X)*4+1].i=image[((i*2+OFFSETX)+(j*2+OFFSETY)*screenx)*4+1];
   grab_image[(i+j*RETINA_X)*4+2].i=image[((i*2+OFFSETX)+(j*2+OFFSETY)*screenx)*4+0];
   grab_image[(i+j*RETINA_X)*4+3].i=255;
  }
 }
 camera=create_res(RETINA_X*4*RETINA_Y,sizeof(IBT),grab_image);
 delete grab_image;

 
 ////////////////////////////////////
 // GET CORNERS
 //

 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,blur,blurmap,                          2,constantbuf,camera);
 call(1,1,1                                  ,false,0,init_threshbuf_initial,threshbuf,      1,constantbuf);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,sobel_lsbrem,sobelmap_lsbrem,          2,constantbuf,blurmap);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,sobel_lsbrem,sobelmap,                 2,constantbuf,camera);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,corner,cornermap,                      4,constantbuf,sobelmap_lsbrem,threshbuf,sobelmap);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,declump,cornermap,                     2,constantbuf,cornermap);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,get_output,output,                     3,constantbuf,camera,cornermap);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,sobel,sobelmap,                        2,constantbuf,camera);
   //TODO:: this turns into a little tracker, to stabalize the corner response.

 //////////////////////////////////////////
 // ZAZZY NEAR MATCHING MEMORY  (this gets a little addition to it, then it goes into the motor generator as well.)
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,density,densitymap,                    2,constantbuf,sobelmap);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,build_keys,keymap,                     2,constantbuf,densitymap);
 release_res(constantbuf);
 data1[0].u=RETINA_X;
 data1[1].u=0;
 constantbuf=create_res(2, sizeof(UBT), data1);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,match,matchmap,                        5,constantbuf,keymap,jumpmap_sim,codemap,corner);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,validate_corner,cornerdistvalid,       3,constantbuf,matchmap,corner);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,true ,0,trainkey_buildinsertmap,insertmap,     4,constantbuf,keymap,jumpmap,cornerdistvalid);
 call(256/TBLOCK,256/TBLOCK,1                ,false,0,trainkey_update_jumps,jumpmap,         3,constantbuf,jumpmap,insertmap);
 call(ROOTCODEMAP/TBLOCK,ROOTCODEMAP/TBLOCK,1,false,0,trainkey_update_codelist,codemap,      3,constantbuf,codemap,insertmap);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,trainkey_add_codes,codemap,            6,constantbuf,codemap,jumpmap,keymap,cornerdistvalid,matchmap);
 //reprecompute the fixed jumps for next time.
 call(256/TBLOCK,256/TBLOCK,1                ,false,0,init_jumpmap_sim,jumpmap_sim,          2,constantbuf,jumpmap);
 for(i=0;i<16;i++)
 {
  call(256/TBLOCK,256/TBLOCK,1               ,false,0,increment_jumpmapiteration,constantbuf,2,constantbuf,constantbuf);
  call(256/TBLOCK,256/TBLOCK,1               ,false,0,precompute_fixed_jumps,jumpmap_sim,    2,constantbuf,jumpmap_sim);
 }
 release_res(constantbuf);

 data1[0].u=RETINA_X;
 data1[1].u=RETINA_Y;
 constantbuf=create_res(2, sizeof(UBT), data1);

 ////////////////////////////////////////////////
 // COMPUTE THE 3D
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,densityupdate,densitymem,              5,constantbuf,densitymem,matchmap,corner,densitymap);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,generate3dcorners,corners3d,           5,constantbuf,densitymem,matchmap,corner); //this does the normalization.
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,interpolate,depthmap,                  3,constantbuf,densitymap,cornermap);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,dmax,depthmap,                         2,constantbuf,depthmap);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,density,densitymap,                    2,constantbuf,sobelmap);


 //TODO:: your to get camera movement, then average the points over time and store them,
 //  and get rid of any moving points.

 release_res(constantbuf);
}

Heres my system call side (invokes the shaders) for this 2d video signal to 3d video signal converter, once its seen enough video it can actually make photos by themselves 3d,  theres more work to go yet tho,   and Ive never got it working perfect ever, but this will be my best attempt yet.

If you ever wanted to know what hyper-parallel code can look like,   this is what mine does.

 


OpenAI Speech-to-Speech Reasoning Demo
by MikeB (AI News )
March 31, 2024, 01:00:53 pm
Say good-bye to GPUs...
by MikeB (AI News )
March 23, 2024, 09:23:52 am
Google Bard report
by ivan.moony (AI News )
February 14, 2024, 04:42:23 pm
Elon Musk's xAI Grok Chatbot
by MikeB (AI News )
December 11, 2023, 06:26:33 am
Nvidia Hype
by 8pla.net (AI News )
December 06, 2023, 10:04:52 pm
How will the OpenAI CEO being Fired affect ChatGPT?
by 8pla.net (AI News )
December 06, 2023, 09:54:25 pm
Independent AI sovereignties
by WriterOfMinds (AI News )
November 08, 2023, 04:51:21 am
LLaMA2 Meta's chatbot released
by 8pla.net (AI News )
October 18, 2023, 11:41:21 pm

Users Online

197 Guests, 1 User
Users active in past 15 minutes:
ivan.moony
[Trusty Member]

Most Online Today: 359. Most Online Ever: 2369 (November 21, 2020, 04:08:13 pm)

Articles