//#version 450
//
//layout(local_size_x = 16, local_size_y = 16) in;
//layout(set = 4, binding = 0, rgba8) uniform image2D targetImage;
//
//
//void main()
//{
//  ivec2 imageSize = imageSize (targetImage);
//
//  if (gl_GlobalInvocationID.x >= imageSize.x && gl_GlobalInvocationID.y >= imageSize.y)
//    return;
//
//  // load the image
//  vec4 color = imageLoad (targetImage, ivec2 (gl_GlobalInvocationID));
//  
//  // get the average
//  float average = 0.2126 * color.r + 0.7152 * color.g + 0.0722 * color.b;
//
//  // store result into result image
//  imageStore(targetImage, ivec2(gl_GlobalInvocationID), vec4(average, average, average, 1.0f));
//
//}
//
//
//
//

/* Start Header *****************************************************************/

/*! \file (e.g. kirsch.comp)

     \author William Zheng, william.zheng, 60001906. Brandon Mak, brandon.hao 390003920.

     \par william.zheng\@digipen.edu. brandon.hao\@digipen.edu.

     \date Sept 20, 2022

     \brief Copyright (C) 20xx DigiPen Institute of Technology.

  Reproduction or disclosure of this file or its contents without the prior written consent of DigiPen Institute of Technology is prohibited. */

  /* End Header *******************************************************************/

#version 450

#define MASK_WIDTH 3
#define HALF_M_WIDTH MASK_WIDTH / 2
#define SHM_WIDTH 18
#define NUM_MASKS 8

layout(local_size_x = 16, local_size_y = 16) in;
layout(set = 4, binding = 0, rgba8) uniform image2D inputImage;
layout(set = 4, binding = 1, rgba8) uniform image2D resultImage;

const    float kirsch[8][3][3] = {
        {
         {5, 5, 5},
         {-3, 0, -3},           /*rotation 1 */
         {-3, -3, -3}
         },
        {
         {5, 5, -3},
         {5, 0, -3},            /*rotation 2 */
         {-3, -3, -3}
         },
        {
         {5, -3, -3},
         {5, 0, -3},            /*rotation 3 */
         {5, -3, -3}
         },
        {
         {-3, -3, -3},
         {5, 0, -3},            /*rotation 4 */
         {5, 5, -3}
         },
        {
         {-3, -3, -3},
         {-3, 0, -3},           /*rotation 5 */
         {5, 5, 5}
         },
        {
         {-3, -3, -3},
         {-3, 0, 5},            /*rotation 6 */
         {-3, 5, 5}
         },
        {
         {-3, -3, 5},
         {-3, 0, 5},            /*rotation 7 */
         {-3, -3, 5}
         },
        {
         {-3, 5, 5},
         {-3, 0, 5},            /*rotation 8 */
         {-3, -3, -3}
         }
};

vec3 GetImageValues(ivec2 uv, ivec2 inputImageSize)
{
  if (uv.x >= 0 && uv.y >= 0 && uv.x < inputImageSize.x && uv.y < inputImageSize.y)
  {
    return imageLoad(inputImage, uv).rgb;
  }
  else
    return vec3(0.0f);
}

//two extra row/col
shared vec3 sData[16 + 2][16 + 2];

void main()
{
  // convenient variables
  ivec3 globalThread = ivec3(gl_GlobalInvocationID);
  ivec3 localThread = ivec3(gl_LocalInvocationID);
  ivec2 inputImageSize = imageSize(inputImage);

  // load shared memory
  ivec2 start = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize) - ivec2(HALF_M_WIDTH);
  for (int i = localThread.x; i < SHM_WIDTH; i += int(gl_WorkGroupSize.x))
  {
    for (int j = localThread.y; j < SHM_WIDTH; j += int(gl_WorkGroupSize.y))
    {
      // get from source image (either real values or 0)
      vec3 sourceValue = GetImageValues(start + ivec2(i, j), inputImageSize);
      sData[i][j] = sourceValue;
    }
  }

  // wait for shared memory to finish loading
  barrier();

  // max (between all 8 masks)
  vec3 maxSum = vec3(0.0f);

  // loop through all masks 
  for (int i = 0; i < NUM_MASKS; ++i)
  {
    vec3 sum = vec3(0.0f);

    // start of shared memory
    ivec2 shmStart = ivec2(localThread + HALF_M_WIDTH);
    for (int j = -1; j < HALF_M_WIDTH + 1; ++j)
    {
      for (int k = -1; k < HALF_M_WIDTH + 1; ++k)
      {
        // Perform convolution using shared_memory
        sum += sData[shmStart.x + j][shmStart.y + k] * kirsch[i][j + 1][k + 1];
      }
    }

    // Get highest sum
    maxSum = max(sum, maxSum);
  }

  // average the max sum
  maxSum = min(max(maxSum / 8, 0), 1.0f);

  // store result into result image
  imageStore(resultImage, ivec2(gl_GlobalInvocationID.xy), vec4(maxSum, 1.0f));

}