what highly parallel direct compute code can look like

  • 0 Replies
  • 3906 Views
*

MagnusWootton

  • Replicant
  • ********
  • 646
what highly parallel direct compute code can look like
« on: July 03, 2023, 02:25:04 pm »
Code: "c++"
#include "tracker3d.h"

CS init_threshbuf_initial;
CS init_threshbuf;
CS count_corners;
CS step_threshbuf;
CS sobel;
CS blur;
CS dmax;
CS sobel_lsbrem;
CS corner;
CS finalcorner;
CS get_output;
CS declump;
CS density;
CS generate3dcorners;
CS interpolate;
CS validate_corner;
CS density_init;

//new zazzy memory system
CS build_keys;
CS init_jumps;
CS init_jumpmap_sim;
CS init_codes;
CS increment_jumpmapiteration;
CS precompute_fixed_jumps;
CS match;
CS trainkey_buildinsertmap;
CS trainkey_update_jumps;
CS trainkey_update_codelist;
CS trainkey_add_codes;

//MEMORY RESOURCES
RES matchmap;
RES jumpmap_sim;
RES jumpmap;
RES codemap;
RES insertmap;
RES densitymem;
RES cornerdistvalid;

CS densityupdate;

RES corners3d;
RES camera;
RES blurmap;
RES sobelmap;
RES sobelmap_lsbrem;
RES cornermap;
RES cornermap2;
RES corner_count;
RES threshbuf;
RES output;
RES densitymap;
RES keymap;
RES depthmap;
BUF outputbuf;
BUF output3dbuf;
int screenx=GetSystemMetrics(SM_CXSCREEN);
int screeny = GetSystemMetrics(SM_CYSCREEN);
uchar* image= new uchar[screenx*4*screeny];

void init_tracker(void)
{
 CreateComputeShader( L"tracker3d.hlsl", "declump",                    DEV,    &declump);
 CreateComputeShader( L"tracker3d.hlsl", "blur",                       DEV,    &blur);
 CreateComputeShader( L"tracker3d.hlsl", "dmax",                       DEV,    &dmax);
 CreateComputeShader( L"tracker3d.hlsl", "sobel",                      DEV,    &sobel);
 CreateComputeShader( L"tracker3d.hlsl", "sobel_lsbrem",               DEV,    &sobel_lsbrem);
 CreateComputeShader( L"tracker3d.hlsl", "init_threshbuf_initial",     DEV,    &init_threshbuf_initial);
 CreateComputeShader( L"tracker3d.hlsl", "init_threshbuf",             DEV,    &init_threshbuf);
 CreateComputeShader( L"tracker3d.hlsl", "step_threshbuf",             DEV,    &step_threshbuf);
 CreateComputeShader( L"tracker3d.hlsl", "corner",                     DEV,    &corner);
 CreateComputeShader( L"tracker3d.hlsl", "validate_corner",            DEV,    &validate_corner);
 CreateComputeShader( L"tracker3d.hlsl", "finalcorner",                DEV,    &finalcorner);
 CreateComputeShader( L"tracker3d.hlsl", "count_corners",              DEV,    &count_corners);
 CreateComputeShader( L"tracker3d.hlsl", "density",                    DEV,    &density);
 CreateComputeShader( L"tracker3d.hlsl", "build_keys",                 DEV,    &build_keys);
 CreateComputeShader( L"tracker3d.hlsl", "get_output",                 DEV,    &get_output);
 CreateComputeShader( L"tracker3d.hlsl", "generate3dcorners",          DEV,    &generate3dcorners);
 CreateComputeShader( L"tracker3d.hlsl", "interpolate",                DEV,    &interpolate);
 CreateComputeShader( L"tracker3d.hlsl", "densityupdate",              DEV,    &densityupdate);
 CreateComputeShader( L"tracker3d.hlsl", "density_init",               DEV,    &density_init);

 //MEMORY SYSTEM
 CreateComputeShader( L"tracker3d.hlsl", "build_keys",                 DEV,    &build_keys);
 CreateComputeShader( L"tracker3d.hlsl", "init_jumps",                 DEV,    &init_jumps);
 CreateComputeShader( L"tracker3d.hlsl", "init_codes",                 DEV,    &init_codes);
 CreateComputeShader( L"tracker3d.hlsl", "init_jumpmap_sim",           DEV,    &init_jumpmap_sim);
 CreateComputeShader( L"tracker3d.hlsl", "increment_jumpmapiteration", DEV,    &increment_jumpmapiteration);
 CreateComputeShader( L"tracker3d.hlsl", "precompute_fixed_jumps",     DEV,    &precompute_fixed_jumps);
 CreateComputeShader( L"tracker3d.hlsl", "match",                      DEV,    &match);
 CreateComputeShader( L"tracker3d.hlsl", "trainkey_buildinsertmap",    DEV,    &trainkey_buildinsertmap);
 CreateComputeShader( L"tracker3d.hlsl", "trainkey_update_jumps",      DEV,    &trainkey_update_jumps);
 CreateComputeShader( L"tracker3d.hlsl", "trainkey_update_codelist",   DEV,    &trainkey_update_codelist);
 CreateComputeShader( L"tracker3d.hlsl", "trainkey_add_codes",         DEV,    &trainkey_add_codes);


 IBT* idata=new IBT[RETINA_X*4*RETINA_Y];
 memset(idata,0,RETINA_X*4*RETINA_Y*4);

 codemap         =create_res(ROOTCODEMAP*ROOTCODEMAP,sizeof(UBT),idata);
 matchmap        =create_res(RETINA_X*RETINA_Y*2    ,sizeof(UBT),idata);
 jumpmap_sim     =create_res(256*256*MM_JUMPCAP     ,sizeof(UBT),idata);
 jumpmap         =create_res(256*256                ,sizeof(UBT),idata);
 
 uint imsize=0;
 uint i;
 for(i=0;i<INSERTMIPS;i++){imsize+=(ROOTCODEMAP*ROOTCODEMAP)>>i;}
 insertmap       =create_res(imsize                 ,sizeof(UBT),idata);
 densitymem      =create_res(MM_MAXMEM*2            ,sizeof(IBT),idata);
 cornerdistvalid =create_res(RETINA_X*RETINA_Y      ,sizeof(IBT),idata);
 corners3d       =create_res(RETINA_X*RETINA_Y      ,sizeof(IBT),idata);
 depthmap        =create_res(RETINA_X*RETINA_Y      ,sizeof(IBT),idata);
 blurmap         =create_res(RETINA_X*4*RETINA_Y    ,sizeof(IBT),idata);
 sobelmap        =create_res(RETINA_X*2*RETINA_Y    ,sizeof(IBT),idata);
 densitymap      =create_res(RETINA_X*RETINA_Y      ,sizeof(IBT),idata);
 sobelmap_lsbrem =create_res(RETINA_X*2*RETINA_Y    ,sizeof(IBT),idata);
 cornermap       =create_res(RETINA_X*RETINA_Y      ,sizeof(IBT),idata);
 cornermap2      =create_res(RETINA_X*RETINA_Y      ,sizeof(IBT),idata);
 corner_count    =create_res(RETINA_X*RETINA_Y      ,sizeof(IBT),idata);
 threshbuf       =create_res(5                      ,sizeof(IBT),idata);
 output          =create_res(RETINA_X*4*RETINA_Y    ,sizeof(IBT),idata);

 outputbuf       =createdebugbuffer(output);
 output3dbuf     =createdebugbuffer(depthmap);

 UBT data1[7];
 data1[0].u=RETINA_X;
 data1[1].u=RETINA_Y;
 RES constantbuf=create_res(2, sizeof(UBT), data1);

 call(256/TBLOCK,256/TBLOCK,1,false,0                    ,init_jumps  ,jumpmap    ,1,constantbuf);
 call(ROOTCODEMAP/TBLOCK,ROOTCODEMAP/TBLOCK,1,false,0    ,init_codes  ,codemap    ,1,constantbuf);
 call(MM_ROOTMAXMEM/TBLOCK,MM_ROOTMAXMEM/TBLOCK,1,false,0,density_init,densitymem ,1,constantbuf);

 release_res(constantbuf);


 delete idata;
}


void run_tracker(void)
{
 UBT data1[7];
 data1[0].u=RETINA_X;
 data1[1].u=RETINA_Y;
 RES constantbuf=create_res(2, sizeof(UBT), data1);
 
 //get the camera out of the top left corner.
 IBT* grab_image=new IBT[RETINA_X*4*RETINA_Y];
 
 uint OFFSETX=0;
 uint OFFSETY=200;
 int i,j;
 for(i=0;i<RETINA_X;i++)
 {
  for(j=0;j<RETINA_Y;j++)
  {
   grab_image[(i+j*RETINA_X)*4+0].i=image[((i*2+OFFSETX)+(j*2+OFFSETY)*screenx)*4+2];
   grab_image[(i+j*RETINA_X)*4+1].i=image[((i*2+OFFSETX)+(j*2+OFFSETY)*screenx)*4+1];
   grab_image[(i+j*RETINA_X)*4+2].i=image[((i*2+OFFSETX)+(j*2+OFFSETY)*screenx)*4+0];
   grab_image[(i+j*RETINA_X)*4+3].i=255;
  }
 }
 camera=create_res(RETINA_X*4*RETINA_Y,sizeof(IBT),grab_image);
 delete grab_image;

 
 ////////////////////////////////////
 // GET CORNERS
 //

 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,blur,blurmap,                          2,constantbuf,camera);
 call(1,1,1                                  ,false,0,init_threshbuf_initial,threshbuf,      1,constantbuf);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,sobel_lsbrem,sobelmap_lsbrem,          2,constantbuf,blurmap);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,sobel_lsbrem,sobelmap,                 2,constantbuf,camera);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,corner,cornermap,                      4,constantbuf,sobelmap_lsbrem,threshbuf,sobelmap);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,declump,cornermap,                     2,constantbuf,cornermap);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,get_output,output,                     3,constantbuf,camera,cornermap);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,sobel,sobelmap,                        2,constantbuf,camera);
   //TODO:: this turns into a little tracker, to stabalize the corner response.

 //////////////////////////////////////////
 // ZAZZY NEAR MATCHING MEMORY  (this gets a little addition to it, then it goes into the motor generator as well.)
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,density,densitymap,                    2,constantbuf,sobelmap);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,build_keys,keymap,                     2,constantbuf,densitymap);
 release_res(constantbuf);
 data1[0].u=RETINA_X;
 data1[1].u=0;
 constantbuf=create_res(2, sizeof(UBT), data1);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,match,matchmap,                        5,constantbuf,keymap,jumpmap_sim,codemap,corner);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,validate_corner,cornerdistvalid,       3,constantbuf,matchmap,corner);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,true ,0,trainkey_buildinsertmap,insertmap,     4,constantbuf,keymap,jumpmap,cornerdistvalid);
 call(256/TBLOCK,256/TBLOCK,1                ,false,0,trainkey_update_jumps,jumpmap,         3,constantbuf,jumpmap,insertmap);
 call(ROOTCODEMAP/TBLOCK,ROOTCODEMAP/TBLOCK,1,false,0,trainkey_update_codelist,codemap,      3,constantbuf,codemap,insertmap);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,trainkey_add_codes,codemap,            6,constantbuf,codemap,jumpmap,keymap,cornerdistvalid,matchmap);
 //reprecompute the fixed jumps for next time.
 call(256/TBLOCK,256/TBLOCK,1                ,false,0,init_jumpmap_sim,jumpmap_sim,          2,constantbuf,jumpmap);
 for(i=0;i<16;i++)
 {
  call(256/TBLOCK,256/TBLOCK,1               ,false,0,increment_jumpmapiteration,constantbuf,2,constantbuf,constantbuf);
  call(256/TBLOCK,256/TBLOCK,1               ,false,0,precompute_fixed_jumps,jumpmap_sim,    2,constantbuf,jumpmap_sim);
 }
 release_res(constantbuf);

 data1[0].u=RETINA_X;
 data1[1].u=RETINA_Y;
 constantbuf=create_res(2, sizeof(UBT), data1);

 ////////////////////////////////////////////////
 // COMPUTE THE 3D
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,densityupdate,densitymem,              5,constantbuf,densitymem,matchmap,corner,densitymap);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,generate3dcorners,corners3d,           5,constantbuf,densitymem,matchmap,corner); //this does the normalization.
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,interpolate,depthmap,                  3,constantbuf,densitymap,cornermap);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,dmax,depthmap,                         2,constantbuf,depthmap);
 call(RETINA_X/TBLOCK,RETINA_Y/TBLOCK,1      ,false,0,density,densitymap,                    2,constantbuf,sobelmap);


 //TODO:: your to get camera movement, then average the points over time and store them,
 //  and get rid of any moving points.

 release_res(constantbuf);
}

Heres my system call side (invokes the shaders) for this 2d video signal to 3d video signal converter, once its seen enough video it can actually make photos by themselves 3d,  theres more work to go yet tho,   and Ive never got it working perfect ever, but this will be my best attempt yet.

If you ever wanted to know what hyper-parallel code can look like,   this is what mine does.

 


Requirements for functional equivalence to conscious processing?
by DaltonG (General AI Discussion)
November 19, 2024, 11:56:05 am
Will LLMs ever learn what is ... is?
by HS (Future of AI)
November 10, 2024, 06:28:10 pm
Who's the AI?
by frankinstien (Future of AI)
November 04, 2024, 05:45:05 am
Project Acuitas
by WriterOfMinds (General Project Discussion)
October 27, 2024, 09:17:10 pm
Ai improving AI
by infurl (AI Programming)
October 19, 2024, 03:43:29 am
Atronach's Eye
by WriterOfMinds (Home Made Robots)
October 13, 2024, 09:52:42 pm
Running local AI models
by spydaz (AI Programming)
October 07, 2024, 09:00:53 am
Hi IM BAA---AAACK!!
by MagnusWootton (Home Made Robots)
September 16, 2024, 09:49:10 pm
LLaMA2 Meta's chatbot released
by spydaz (AI News )
August 24, 2024, 02:58:36 pm
ollama and llama3
by spydaz (AI News )
August 24, 2024, 02:55:13 pm
AI controlled F-16, for real!
by frankinstien (AI News )
June 15, 2024, 05:40:28 am
Open AI GPT-4o - audio, vision, text combined reasoning
by MikeB (AI News )
May 14, 2024, 05:46:48 am
OpenAI Speech-to-Speech Reasoning Demo
by MikeB (AI News )
March 31, 2024, 01:00:53 pm
Say good-bye to GPUs...
by MikeB (AI News )
March 23, 2024, 09:23:52 am
Google Bard report
by ivan.moony (AI News )
February 14, 2024, 04:42:23 pm
Elon Musk's xAI Grok Chatbot
by MikeB (AI News )
December 11, 2023, 06:26:33 am

Users Online

280 Guests, 0 Users

Most Online Today: 461. Most Online Ever: 2369 (November 21, 2020, 04:08:13 pm)

Articles