Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
187 changes: 172 additions & 15 deletions include/k2c_conv_transpose_layer.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,39 +28,47 @@ void k2c_conv1d_transpose(k2c_tensor *output, const k2c_tensor *input,

const size_t ker_dim12 = n_channels * n_filters;

size_t cs = 0;
size_t ce = 0;
size_t ts = 0;
size_t ks = 0;
// changed some names for refactor clarity
size_t output_start_idx = 0; // cs
size_t output_end_idx = 0; // ce
size_t output_raw_idx = 0; // ts
size_t kernel_offset = 0; // ks
Comment on lines +32 to +35
Copy link

Copilot AI Dec 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the comment on lines 31-35, the variable names are being renamed for clarity (e.g., 'cs' to 'output_start_idx'). However, these comments show the old variable names in a format that suggests they are being defined (e.g., "// cs"). This notation is somewhat ambiguous. Consider clarifying this with a format like "// was: cs" or "// renamed from: cs" to make it clear these are the old names being replaced.

Suggested change
size_t output_start_idx = 0; // cs
size_t output_end_idx = 0; // ce
size_t output_raw_idx = 0; // ts
size_t kernel_offset = 0; // ks
size_t output_start_idx = 0; // was: cs
size_t output_end_idx = 0; // was: ce
size_t output_raw_idx = 0; // was: ts
size_t kernel_offset = 0; // was: ks

Copilot uses AI. Check for mistakes.

for (size_t f = 0; f < n_filters; ++f)
{
for (size_t ch = 0; ch < n_channels; ++ch)
{
for (size_t t = 0; t < n_height; ++t)
{
ts = t * stride;
if (ts > start_crop)
output_raw_idx = t * stride;

// start index
if (output_raw_idx > start_crop)
{
cs = ts - start_crop;
output_start_idx = output_raw_idx - start_crop;
}
else
{
cs = 0;
output_start_idx = 0;
}
if (ts + k_size - start_crop > out_height)

// end index
if (output_raw_idx + k_size - start_crop > out_height)
{
ce = out_height;
output_end_idx = out_height;
}
else
{
ce = ts + k_size - start_crop;
output_end_idx = output_raw_idx + k_size - start_crop;
}
ks = cs - (ts - start_crop);
for (size_t i = 0; i < ce - cs; ++i)

kernel_offset = output_start_idx - (output_raw_idx - start_crop);

// convolution
for (size_t i = 0; i < output_end_idx - output_start_idx; ++i)
{
output->array[(i + cs) * n_filters + f] +=
kernel->array[(i + ks) * ker_dim12 + f * n_channels + ch] *
output->array[(i + output_start_idx) * n_filters + f] +=
kernel->array[(i + kernel_offset) * ker_dim12 + f * n_channels + ch] *
input->array[t * n_channels + ch];
}
}
Expand All @@ -71,3 +79,152 @@ void k2c_conv1d_transpose(k2c_tensor *output, const k2c_tensor *input,
k2c_bias_add(output, bias);
activation(output->array, output->numel);
}

/**
* 2D Transposed Convolution (Deconvolution).
* Assumes a "channels last" structure.
*
* :param output: output tensor.
* :param input: input tensor.
* :param kernel: kernel tensor.
* :param bias: bias tensor.
* :param stride: array[2] {stride_height, stride_width}.
* :param dilation: array[2] {dilation_height, dilation_width}.
* (Note: Logic below assumes dilation is 1 for the optimized bounds check).
* :param padding: array[2] {crop_top, crop_left}.
* Amount to crop from the output (inverse of padding).
* :param activation: activation function to apply to output.
*/
void k2c_conv2d_transpose(k2c_tensor *output, const k2c_tensor *input,
const k2c_tensor *kernel, const k2c_tensor *bias,
const size_t *stride, const size_t *dilation,
const size_t *padding, k2c_activationType *activation)
Comment on lines +100 to +101
Copy link

Copilot AI Dec 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The dilation parameter is passed to the function but never used in the implementation. While weights2c.py validates that dilation must be 1 (lines 749-750), the C function still accepts the parameter and the comment on line 93 notes this limitation. For better code clarity and maintainability, consider either: (1) removing the dilation parameter entirely if it's always 1, or (2) implementing proper dilation support, or (3) adding a runtime assertion in the C code to validate dilation values, similar to the Python validation. The current approach where the parameter is passed but ignored is confusing and could lead to bugs if the Python validation is bypassed or removed in the future.

Copilot uses AI. Check for mistakes.
{
// Initialize output memory to zero
memset(output->array, 0, output->numel * sizeof(output->array[0]));

// --- Dimensions ---
const size_t in_rows = input->shape[0];
const size_t in_cols = input->shape[1];
const size_t in_channels = input->shape[2];

// Kernel Shape: {Rows, Cols, InChannels, OutChannels} based on reference
const size_t k_rows = kernel->shape[0];
const size_t k_cols = kernel->shape[1];
const size_t n_filters = kernel->shape[3];

const size_t out_rows = output->shape[0];
const size_t out_cols = output->shape[1];

// Access strides/padding from arrays
const size_t stride_h = stride[0];
const size_t stride_w = stride[1];
const size_t crop_h = padding[0];
const size_t crop_w = padding[1];

// Pre-calculate dimensional steps for Kernel
// Kernel index math: z0 * (cols*in*out) + z1 * (in*out) + q * (out) + k
// Note: This matches the "Out-Channel Last" memory layout of the reference.
Copy link

Copilot AI Dec 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The kernel indexing comment on line 126 mentions "Out-Channel Last" memory layout, but this is inconsistent with the earlier comment on lines 799-801 in weights2c.py which states the C code expects (rows, cols, in_channels, out_channels). "Out-Channel Last" would typically mean the out_channels dimension is the last one, which matches the stated expectation. However, the phrase "Out-Channel Last" is not a standard term and could be confusing. Consider using clearer terminology like "output channels in the last dimension" or referencing the exact shape expected.

Suggested change
// Note: This matches the "Out-Channel Last" memory layout of the reference.
// Note: This matches a kernel layout of (rows, cols, in_channels, out_channels),
// i.e., with output channels in the last dimension.

Copilot uses AI. Check for mistakes.
const size_t k_step_row = kernel->shape[1] * kernel->shape[2] * kernel->shape[3];
const size_t k_step_col = kernel->shape[2] * kernel->shape[3];
const size_t k_step_in = kernel->shape[3];

// --- Window Variables ---
// Vertical (Rows)
size_t row_raw_idx, row_start_idx, row_end_idx, row_ker_offset;
// Horizontal (Cols)
size_t col_raw_idx, col_start_idx, col_end_idx, col_ker_offset;

// Loop 1: Filters (Output Channels)
for (size_t f = 0; f < n_filters; ++f)
{
// Loop 2: Input Channels
for (size_t ch = 0; ch < in_channels; ++ch)
{
// Loop 3: Input Rows
for (size_t r = 0; r < in_rows; ++r)
{
// === Vertical Bounds Calculation (Similar to 1D) ===
row_raw_idx = r * stride_h;

// Clamp Top
if (row_raw_idx > crop_h)
row_start_idx = row_raw_idx - crop_h;
else
row_start_idx = 0;

// Clamp Bottom
if (row_raw_idx + k_rows - crop_h > out_rows)
row_end_idx = out_rows;
else
row_end_idx = row_raw_idx + k_rows - crop_h;

// Kernel Offset (Vertical)
row_ker_offset = row_start_idx - (row_raw_idx - crop_h);


// Loop 4: Input Columns
for (size_t c = 0; c < in_cols; ++c)
{
// === Horizontal Bounds Calculation ===
col_raw_idx = c * stride_w;

// Clamp Left
if (col_raw_idx > crop_w)
col_start_idx = col_raw_idx - crop_w;
else
col_start_idx = 0;

// Clamp Right
if (col_raw_idx + k_cols - crop_w > out_cols)
col_end_idx = out_cols;
else
col_end_idx = col_raw_idx + k_cols - crop_w;

// Kernel Offset (Horizontal)
col_ker_offset = col_start_idx - (col_raw_idx - crop_w);

// Pre-calculate Input Value
// Input Index: r * (cols*ch) + c * (ch) + ch
float input_val = input->array[r * (in_cols * in_channels) + c * in_channels + ch];

// === Inner Loops (Spatial Accumulation) ===
// Iterating over the VALID intersection of kernel and output
size_t valid_h = row_end_idx - row_start_idx;
size_t valid_w = col_end_idx - col_start_idx;

for (size_t kr = 0; kr < valid_h; ++kr)
{
for (size_t kc = 0; kc < valid_w; ++kc)
{
// 1. Output Index
// Row: (kr + row_start_idx)
// Col: (kc + col_start_idx)
// Channel: f
size_t out_r = kr + row_start_idx;
size_t out_c = kc + col_start_idx;

size_t out_idx = out_r * (out_cols * n_filters) + out_c * n_filters + f;

// 2. Kernel Index
// Row: (kr + row_ker_offset)
// Col: (kc + col_ker_offset)
// InChannel: ch
// OutChannel: f
size_t k_r = kr + row_ker_offset;
size_t k_c = kc + col_ker_offset;

size_t ker_idx = k_r * k_step_row + k_c * k_step_col + ch * k_step_in + f;

// 3. Accumulate
output->array[out_idx] += kernel->array[ker_idx] * input_val;
}
}
}
}
}
}

k2c_bias_add(output, bias);
activation(output->array, output->numel);
}
3 changes: 3 additions & 0 deletions include/k2c_include.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ void k2c_upsampling3d(k2c_tensor *output, const k2c_tensor *input, const size_t
void k2c_conv1d_transpose(k2c_tensor *output, const k2c_tensor *input, const k2c_tensor *kernel,
const k2c_tensor *bias, const size_t stride, const size_t start_crop,
k2c_activationType *activation);
void k2c_conv2d_transpose(k2c_tensor *output, const k2c_tensor *input, const k2c_tensor *kernel,
const k2c_tensor *bias, const size_t *stride, const size_t *dilation,
const size_t *padding, k2c_activationType *activation);

// Core Layers
void k2c_dense(k2c_tensor *output, const k2c_tensor *input, const k2c_tensor *kernel,
Expand Down
Loading