diff --git a/include/k2c_conv_transpose_layer.c b/include/k2c_conv_transpose_layer.c index f206889..8512a11 100644 --- a/include/k2c_conv_transpose_layer.c +++ b/include/k2c_conv_transpose_layer.c @@ -28,10 +28,11 @@ void k2c_conv1d_transpose(k2c_tensor *output, const k2c_tensor *input, const size_t ker_dim12 = n_channels * n_filters; - size_t cs = 0; - size_t ce = 0; - size_t ts = 0; - size_t ks = 0; + // changed some names for refactor clarity + size_t output_start_idx = 0; // cs + size_t output_end_idx = 0; // ce + size_t output_raw_idx = 0; // ts + size_t kernel_offset = 0; // ks for (size_t f = 0; f < n_filters; ++f) { @@ -39,28 +40,35 @@ void k2c_conv1d_transpose(k2c_tensor *output, const k2c_tensor *input, { for (size_t t = 0; t < n_height; ++t) { - ts = t * stride; - if (ts > start_crop) + output_raw_idx = t * stride; + + // start index + if (output_raw_idx > start_crop) { - cs = ts - start_crop; + output_start_idx = output_raw_idx - start_crop; } else { - cs = 0; + output_start_idx = 0; } - if (ts + k_size - start_crop > out_height) + + // end index + if (output_raw_idx + k_size - start_crop > out_height) { - ce = out_height; + output_end_idx = out_height; } else { - ce = ts + k_size - start_crop; + output_end_idx = output_raw_idx + k_size - start_crop; } - ks = cs - (ts - start_crop); - for (size_t i = 0; i < ce - cs; ++i) + + kernel_offset = output_start_idx - (output_raw_idx - start_crop); + + // convolution + for (size_t i = 0; i < output_end_idx - output_start_idx; ++i) { - output->array[(i + cs) * n_filters + f] += - kernel->array[(i + ks) * ker_dim12 + f * n_channels + ch] * + output->array[(i + output_start_idx) * n_filters + f] += + kernel->array[(i + kernel_offset) * ker_dim12 + f * n_channels + ch] * input->array[t * n_channels + ch]; } } @@ -71,3 +79,152 @@ void k2c_conv1d_transpose(k2c_tensor *output, const k2c_tensor *input, k2c_bias_add(output, bias); activation(output->array, output->numel); } + +/** + * 2D Transposed Convolution (Deconvolution). + * Assumes a "channels last" structure. + * + * :param output: output tensor. + * :param input: input tensor. + * :param kernel: kernel tensor. + * :param bias: bias tensor. + * :param stride: array[2] {stride_height, stride_width}. + * :param dilation: array[2] {dilation_height, dilation_width}. + * (Note: Logic below assumes dilation is 1 for the optimized bounds check). + * :param padding: array[2] {crop_top, crop_left}. + * Amount to crop from the output (inverse of padding). + * :param activation: activation function to apply to output. + */ +void k2c_conv2d_transpose(k2c_tensor *output, const k2c_tensor *input, + const k2c_tensor *kernel, const k2c_tensor *bias, + const size_t *stride, const size_t *dilation, + const size_t *padding, k2c_activationType *activation) +{ + // Initialize output memory to zero + memset(output->array, 0, output->numel * sizeof(output->array[0])); + + // --- Dimensions --- + const size_t in_rows = input->shape[0]; + const size_t in_cols = input->shape[1]; + const size_t in_channels = input->shape[2]; + + // Kernel Shape: {Rows, Cols, InChannels, OutChannels} based on reference + const size_t k_rows = kernel->shape[0]; + const size_t k_cols = kernel->shape[1]; + const size_t n_filters = kernel->shape[3]; + + const size_t out_rows = output->shape[0]; + const size_t out_cols = output->shape[1]; + + // Access strides/padding from arrays + const size_t stride_h = stride[0]; + const size_t stride_w = stride[1]; + const size_t crop_h = padding[0]; + const size_t crop_w = padding[1]; + + // Pre-calculate dimensional steps for Kernel + // Kernel index math: z0 * (cols*in*out) + z1 * (in*out) + q * (out) + k + // Note: This matches the "Out-Channel Last" memory layout of the reference. + const size_t k_step_row = kernel->shape[1] * kernel->shape[2] * kernel->shape[3]; + const size_t k_step_col = kernel->shape[2] * kernel->shape[3]; + const size_t k_step_in = kernel->shape[3]; + + // --- Window Variables --- + // Vertical (Rows) + size_t row_raw_idx, row_start_idx, row_end_idx, row_ker_offset; + // Horizontal (Cols) + size_t col_raw_idx, col_start_idx, col_end_idx, col_ker_offset; + + // Loop 1: Filters (Output Channels) + for (size_t f = 0; f < n_filters; ++f) + { + // Loop 2: Input Channels + for (size_t ch = 0; ch < in_channels; ++ch) + { + // Loop 3: Input Rows + for (size_t r = 0; r < in_rows; ++r) + { + // === Vertical Bounds Calculation (Similar to 1D) === + row_raw_idx = r * stride_h; + + // Clamp Top + if (row_raw_idx > crop_h) + row_start_idx = row_raw_idx - crop_h; + else + row_start_idx = 0; + + // Clamp Bottom + if (row_raw_idx + k_rows - crop_h > out_rows) + row_end_idx = out_rows; + else + row_end_idx = row_raw_idx + k_rows - crop_h; + + // Kernel Offset (Vertical) + row_ker_offset = row_start_idx - (row_raw_idx - crop_h); + + + // Loop 4: Input Columns + for (size_t c = 0; c < in_cols; ++c) + { + // === Horizontal Bounds Calculation === + col_raw_idx = c * stride_w; + + // Clamp Left + if (col_raw_idx > crop_w) + col_start_idx = col_raw_idx - crop_w; + else + col_start_idx = 0; + + // Clamp Right + if (col_raw_idx + k_cols - crop_w > out_cols) + col_end_idx = out_cols; + else + col_end_idx = col_raw_idx + k_cols - crop_w; + + // Kernel Offset (Horizontal) + col_ker_offset = col_start_idx - (col_raw_idx - crop_w); + + // Pre-calculate Input Value + // Input Index: r * (cols*ch) + c * (ch) + ch + float input_val = input->array[r * (in_cols * in_channels) + c * in_channels + ch]; + + // === Inner Loops (Spatial Accumulation) === + // Iterating over the VALID intersection of kernel and output + size_t valid_h = row_end_idx - row_start_idx; + size_t valid_w = col_end_idx - col_start_idx; + + for (size_t kr = 0; kr < valid_h; ++kr) + { + for (size_t kc = 0; kc < valid_w; ++kc) + { + // 1. Output Index + // Row: (kr + row_start_idx) + // Col: (kc + col_start_idx) + // Channel: f + size_t out_r = kr + row_start_idx; + size_t out_c = kc + col_start_idx; + + size_t out_idx = out_r * (out_cols * n_filters) + out_c * n_filters + f; + + // 2. Kernel Index + // Row: (kr + row_ker_offset) + // Col: (kc + col_ker_offset) + // InChannel: ch + // OutChannel: f + size_t k_r = kr + row_ker_offset; + size_t k_c = kc + col_ker_offset; + + size_t ker_idx = k_r * k_step_row + k_c * k_step_col + ch * k_step_in + f; + + // 3. Accumulate + output->array[out_idx] += kernel->array[ker_idx] * input_val; + } + } + } + } + } + } + + k2c_bias_add(output, bias); + activation(output->array, output->numel); +} diff --git a/include/k2c_include.h b/include/k2c_include.h index c30ce0d..282f173 100644 --- a/include/k2c_include.h +++ b/include/k2c_include.h @@ -68,6 +68,9 @@ void k2c_upsampling3d(k2c_tensor *output, const k2c_tensor *input, const size_t void k2c_conv1d_transpose(k2c_tensor *output, const k2c_tensor *input, const k2c_tensor *kernel, const k2c_tensor *bias, const size_t stride, const size_t start_crop, k2c_activationType *activation); +void k2c_conv2d_transpose(k2c_tensor *output, const k2c_tensor *input, const k2c_tensor *kernel, + const k2c_tensor *bias, const size_t *stride, const size_t *dilation, + const size_t *padding, k2c_activationType *activation); // Core Layers void k2c_dense(k2c_tensor *output, const k2c_tensor *input, const k2c_tensor *kernel, diff --git a/keras2c/layer2c.py b/keras2c/layer2c.py index 390cae8..70d0771 100644 --- a/keras2c/layer2c.py +++ b/keras2c/layer2c.py @@ -8,8 +8,13 @@ """ # imports -from keras2c.io_parsing import layer_type, get_model_io_names, get_all_io_names, get_layer_io_names, flatten - +from keras2c.io_parsing import ( + flatten, + get_all_io_names, + get_layer_io_names, + get_model_io_names, + layer_type, +) # Original author # __author__ = "Rory Conlin" @@ -23,7 +28,7 @@ __email__ = "guptaa@fusion.gat.com" -class Layers2C(): +class Layers2C: """Creates an object to parse and write layer functions. Args: @@ -167,88 +172,166 @@ def flatten_any(x): def _write_layer_TimeDistributed(self, layer, inputs, outputs, i): # nm, pnm, inputs, outputs = self._format_io_names(layer, inputs, outputs) - self.layers += 'for(size_t i=0; i<' + layer.name + \ - '_timesteps; ++i) { \n' + self.layers += "for(size_t i=0; i<" + layer.name + "_timesteps; ++i) { \n" if inputs in self.model_inputs: - self.layers += layer.layer.name + '_timeslice_input.array = &' + \ - inputs + '_input->array[i*' + layer.name + '_in_offset]; \n' + self.layers += ( + layer.layer.name + + "_timeslice_input.array = &" + + inputs + + "_input->array[i*" + + layer.name + + "_in_offset]; \n" + ) else: - self.layers += layer.layer.name + '_timeslice_input.array = &' + \ - inputs + '_output.array[i*' + layer.name + '_in_offset]; \n' + self.layers += ( + layer.layer.name + + "_timeslice_input.array = &" + + inputs + + "_output.array[i*" + + layer.name + + "_in_offset]; \n" + ) if outputs in self.model_outputs: - self.layers += layer.layer.name + '_timeslice_output.array = &' + \ - outputs + '_output->array[i*' + layer.name + '_out_offset]; \n' + self.layers += ( + layer.layer.name + + "_timeslice_output.array = &" + + outputs + + "_output->array[i*" + + layer.name + + "_out_offset]; \n" + ) else: - self.layers += layer.layer.name + '_timeslice_output.array = &' + \ - outputs + '_output.array[i*' + layer.name + '_out_offset]; \n' - - inp = '&' + layer.layer.name + '_timeslice' - outp = '&' + layer.layer.name + '_timeslice' - method = getattr(self, '_write_layer_' + layer_type(layer.layer)) + self.layers += ( + layer.layer.name + + "_timeslice_output.array = &" + + outputs + + "_output.array[i*" + + layer.name + + "_out_offset]; \n" + ) + + inp = "&" + layer.layer.name + "_timeslice" + outp = "&" + layer.layer.name + "_timeslice" + method = getattr(self, "_write_layer_" + layer_type(layer.layer)) method(layer.layer, inp, outp, i) - self.layers += '\n } \n' + self.layers += "\n } \n" def _write_layer_Bidirectional(self, layer, inputs, outputs, i): subname = layer.forward_layer.name - method = getattr(self, '_write_layer_' + layer_type(layer.forward_layer)) + method = getattr(self, "_write_layer_" + layer_type(layer.forward_layer)) method(layer.forward_layer, inputs, subname, i) subname = layer.backward_layer.name - method = getattr(self, '_write_layer_' + layer_type(layer.backward_layer)) + method = getattr(self, "_write_layer_" + layer_type(layer.backward_layer)) method(layer.backward_layer, inputs, subname, i) mode = layer.merge_mode inputs = [layer.forward_layer.name, layer.backward_layer.name] if layer.return_sequences: - self.layers += 'k2c_flip(&' + subname + '_output,0); \n' - if mode == 'sum': - self._write_layer_Merge(layer, inputs, outputs, 0, 'Add') - elif mode == 'mul': - self._write_layer_Merge(layer, inputs, outputs, 0, 'Multiply') - elif mode == 'ave': - self._write_layer_Merge(layer, inputs, outputs, 0, 'Average') - elif mode == 'concat': + self.layers += "k2c_flip(&" + subname + "_output,0); \n" + if mode == "sum": + self._write_layer_Merge(layer, inputs, outputs, 0, "Add") + elif mode == "mul": + self._write_layer_Merge(layer, inputs, outputs, 0, "Multiply") + elif mode == "ave": + self._write_layer_Merge(layer, inputs, outputs, 0, "Average") + elif mode == "concat": self._write_layer_Concatenate(layer, inputs, outputs, 0) def _write_layer_LSTM(self, layer, inputs, outputs, i): - nm, pnm, inputs, outputs = self._format_io_names( - layer, inputs, outputs) - self.layers += 'k2c_lstm(' + outputs + ',' + inputs + ',' + nm + \ - '_state,' + pnm + '_kernel, \n\t' + pnm + \ - '_recurrent_kernel,' + pnm + '_bias,' + nm + \ - '_fwork, \n\t' + nm + '_go_backwards,' + nm + \ - '_return_sequences, \n\t' + \ - 'k2c_' + layer.get_config()['recurrent_activation'] + \ - ',' + 'k2c_' + \ - layer.get_config()['activation'] + '); \n' + nm, pnm, inputs, outputs = self._format_io_names(layer, inputs, outputs) + self.layers += ( + "k2c_lstm(" + + outputs + + "," + + inputs + + "," + + nm + + "_state," + + pnm + + "_kernel, \n\t" + + pnm + + "_recurrent_kernel," + + pnm + + "_bias," + + nm + + "_fwork, \n\t" + + nm + + "_go_backwards," + + nm + + "_return_sequences, \n\t" + + "k2c_" + + layer.get_config()["recurrent_activation"] + + "," + + "k2c_" + + layer.get_config()["activation"] + + "); \n" + ) def _write_layer_Dense(self, layer, inputs, outputs, i): - nm, pnm, inputs, outputs = self._format_io_names( - layer, inputs, outputs) - activation = 'k2c_' + layer.get_config()['activation'] - - self.layers += 'k2c_dense(' + outputs + ',' + inputs + ',' + pnm + \ - '_kernel, \n\t' + pnm + '_bias,' + activation + ',' + \ - nm + '_fwork); \n' + nm, pnm, inputs, outputs = self._format_io_names(layer, inputs, outputs) + activation = "k2c_" + layer.get_config()["activation"] + + self.layers += ( + "k2c_dense(" + + outputs + + "," + + inputs + + "," + + pnm + + "_kernel, \n\t" + + pnm + + "_bias," + + activation + + "," + + nm + + "_fwork); \n" + ) def _write_layer_Conv(self, layer, inputs, outputs, i): - nm, pnm, inputs, outputs = self._format_io_names( - layer, inputs, outputs) - activation = 'k2c_' + layer.get_config()['activation'] - if layer_type(layer)[-2:] == '1D': - fname = 'k2c_conv1d(' - elif layer_type(layer)[-2:] == '2D': - fname = 'k2c_conv2d(' - elif layer_type(layer)[-2:] == '3D': - fname = 'k2c_conv3d(' - if layer.get_config()['padding'] == 'valid': - self.layers += fname + outputs + ',' + inputs + ',' + \ - pnm + '_kernel, \n\t' + pnm + '_bias,' + nm + \ - '_stride,' + nm + '_dilation,' + activation + '); \n' + nm, pnm, inputs, outputs = self._format_io_names(layer, inputs, outputs) + activation = "k2c_" + layer.get_config()["activation"] + if layer_type(layer)[-2:] == "1D": + fname = "k2c_conv1d(" + elif layer_type(layer)[-2:] == "2D": + fname = "k2c_conv2d(" + elif layer_type(layer)[-2:] == "3D": + fname = "k2c_conv3d(" + if layer.get_config()["padding"] == "valid": + self.layers += ( + fname + + outputs + + "," + + inputs + + "," + + pnm + + "_kernel, \n\t" + + pnm + + "_bias," + + nm + + "_stride," + + nm + + "_dilation," + + activation + + "); \n" + ) else: - self._write_layer_ZeroPad(layer, inputs, pnm + - '_padded_input', i) - self.layers += fname + outputs + ',' + pnm + \ - '_padded_input,' + pnm + '_kernel, \n\t' + \ - pnm + '_bias,' + nm + '_stride,' + nm + '_dilation,' + activation + '); \n' + self._write_layer_ZeroPad(layer, inputs, pnm + "_padded_input", i) + self.layers += ( + fname + + outputs + + "," + + pnm + + "_padded_input," + + pnm + + "_kernel, \n\t" + + pnm + + "_bias," + + nm + + "_stride," + + nm + + "_dilation," + + activation + + "); \n" + ) def _write_layer_Conv1D(self, layer, inputs, outputs, i): self._write_layer_Conv(layer, inputs, outputs, i) @@ -260,14 +343,52 @@ def _write_layer_Conv3D(self, layer, inputs, outputs, i): self._write_layer_Conv(layer, inputs, outputs, i) def _write_layer_Conv1DTranspose(self, layer, inputs, outputs, i): - nm, pnm, inputs, outputs = self._format_io_names( - layer, inputs, outputs) - activation = 'k2c_' + layer.get_config()['activation'] + nm, pnm, inputs, outputs = self._format_io_names(layer, inputs, outputs) + activation = "k2c_" + layer.get_config()["activation"] # Write the conv1d_transpose layer - self.layers += 'k2c_conv1d_transpose(' + outputs + ',' + inputs + ',' + \ - pnm + '_kernel, \n\t' + pnm + '_bias,' + nm + '_stride,' + \ - nm + '_start_crop,' + activation + '); \n' + self.layers += ( + "k2c_conv1d_transpose(" + + outputs + + "," + + inputs + + "," + + pnm + + "_kernel, \n\t" + + pnm + + "_bias," + + nm + + "_stride," + + nm + + "_start_crop," + + activation + + "); \n" + ) + + def _write_layer_Conv2DTranspose(self, layer, inputs, outputs, i): + nm, pnm, inputs, outputs = self._format_io_names(layer, inputs, outputs) + activation = "k2c_" + layer.get_config()["activation"] + + # Write the conv2d_transpose layer + self.layers += ( + "k2c_conv2d_transpose(" + + outputs + + "," + + inputs + + "," + + pnm + + "_kernel, \n\t" + + pnm + + "_bias," + + nm + + "_stride," + + nm + + "_dilation," + + nm + + "_padding," + + activation + + "); \n" + ) def _write_layer_MaxPooling1D(self, layer, inputs, outputs, i): self._write_layer_Pooling(layer, inputs, outputs, i) @@ -276,25 +397,23 @@ def _write_layer_AveragePooling1D(self, layer, inputs, outputs, i): self._write_layer_Pooling(layer, inputs, outputs, i) def _write_layer_Pooling(self, layer, inputs, outputs, i): - nm, pnm, inputs, outputs = self._format_io_names( - layer, inputs, outputs) - if 'Max' in layer_type(layer): - s = 'k2c_maxpool' + nm, pnm, inputs, outputs = self._format_io_names(layer, inputs, outputs) + if "Max" in layer_type(layer): + s = "k2c_maxpool" else: - s = 'k2c_avgpool' - if layer_type(layer)[-2:] == '1D': - s += '1d(' + outputs + ',' - elif layer_type(layer)[-2:] == '2D': - s += '2d(' + outputs + ',' - - if layer.get_config()['padding'] == 'valid': - s += inputs + ',' + s = "k2c_avgpool" + if layer_type(layer)[-2:] == "1D": + s += "1d(" + outputs + "," + elif layer_type(layer)[-2:] == "2D": + s += "2d(" + outputs + "," + + if layer.get_config()["padding"] == "valid": + s += inputs + "," else: - self._write_layer_ZeroPad(layer, inputs, pnm + - '_padded_input', i) - s += pnm + '_padded_input,' + self._write_layer_ZeroPad(layer, inputs, pnm + "_padded_input", i) + s += pnm + "_padded_input," - s += nm + '_pool_size, \n\t' + nm + '_stride); \n' + s += nm + "_pool_size, \n\t" + nm + "_stride); \n" self.layers += s def _write_layer_MaxPooling2D(self, layer, inputs, outputs, i): @@ -323,87 +442,137 @@ def _write_layer_GlobalAveragePooling3D(self, layer, inputs, outputs, i): def _write_layer_GlobalPooling(self, layer, inputs, outputs, i): _, _, inputs, outputs = self._format_io_names(layer, inputs, outputs) - if 'Max' in layer_type(layer): - self.layers += 'k2c_global_max_pooling(' + if "Max" in layer_type(layer): + self.layers += "k2c_global_max_pooling(" else: - self.layers += 'k2c_global_avg_pooling(' - self.layers += outputs + ',' + inputs + '); \n' + self.layers += "k2c_global_avg_pooling(" + self.layers += outputs + "," + inputs + "); \n" def _write_layer_Add(self, layer, inputs, outputs, i): - self._write_layer_Merge(layer, inputs, outputs, i, 'Add') + self._write_layer_Merge(layer, inputs, outputs, i, "Add") def _write_layer_Subtract(self, layer, inputs, outputs, i): - self._write_layer_Merge(layer, inputs, outputs, i, 'Subtract') + self._write_layer_Merge(layer, inputs, outputs, i, "Subtract") def _write_layer_Multiply(self, layer, inputs, outputs, i): - self._write_layer_Merge(layer, inputs, outputs, i, 'Multiply') + self._write_layer_Merge(layer, inputs, outputs, i, "Multiply") def _write_layer_Maximum(self, layer, inputs, outputs, i): - self._write_layer_Merge(layer, inputs, outputs, i, 'Maximum') + self._write_layer_Merge(layer, inputs, outputs, i, "Maximum") def _write_layer_Minimum(self, layer, inputs, outputs, i): - self._write_layer_Merge(layer, inputs, outputs, i, 'Minimum') + self._write_layer_Merge(layer, inputs, outputs, i, "Minimum") def _write_layer_Average(self, layer, inputs, outputs, i): - self._write_layer_Merge(layer, inputs, outputs, i, 'Average') + self._write_layer_Merge(layer, inputs, outputs, i, "Average") def _write_layer_Merge(self, layer, inputs, outputs, i, mode): nm, _, inputs, outputs = self._format_io_names(layer, inputs, outputs) - if mode == 'Subtract': - self.layers += 'k2c_subtract(' - elif mode == 'Add': - self.layers += 'k2c_add(' - elif mode == 'Multiply': - self.layers += 'k2c_multiply(' - elif mode == 'Average': - self.layers += 'k2c_average(' - elif mode == 'Maximum': - self.layers += 'k2c_max(' - elif mode == 'Minimum': - self.layers += 'k2c_min(' - self.layers += outputs + ',' + nm + '_num_tensors' + str(i) + ',' - c = ','.join(inputs) - self.layers += c + '); \n' + if mode == "Subtract": + self.layers += "k2c_subtract(" + elif mode == "Add": + self.layers += "k2c_add(" + elif mode == "Multiply": + self.layers += "k2c_multiply(" + elif mode == "Average": + self.layers += "k2c_average(" + elif mode == "Maximum": + self.layers += "k2c_max(" + elif mode == "Minimum": + self.layers += "k2c_min(" + self.layers += outputs + "," + nm + "_num_tensors" + str(i) + "," + c = ",".join(inputs) + self.layers += c + "); \n" def _write_layer_Concatenate(self, layer, inputs, outputs, i): nm, _, inputs, outputs = self._format_io_names(layer, inputs, outputs) - self.layers += 'k2c_concatenate(' + outputs + ',' + nm + \ - '_axis' + ',' + nm + '_num_tensors' + str(i) + ',' - c = ','.join(inputs) - self.layers += c + '); \n' + self.layers += ( + "k2c_concatenate(" + + outputs + + "," + + nm + + "_axis" + + "," + + nm + + "_num_tensors" + + str(i) + + "," + ) + c = ",".join(inputs) + self.layers += c + "); \n" def _write_layer_GRU(self, layer, inputs, outputs, i): - nm, pnm, inputs, outputs = self._format_io_names( - layer, inputs, outputs) - self.layers += 'k2c_gru(' + outputs + ',' + inputs + ',' + \ - nm + '_state,' + pnm + '_kernel, \n\t' + \ - pnm + '_recurrent_kernel,' + pnm + '_bias,' + \ - nm + '_fwork, \n\t' + nm + '_reset_after,' + \ - nm + '_go_backwards,' + nm + '_return_sequences, \n\t' + \ - 'k2c_' + layer.get_config()['recurrent_activation'] + \ - ',' + 'k2c_' + layer.get_config()['activation'] + '); \n' + nm, pnm, inputs, outputs = self._format_io_names(layer, inputs, outputs) + self.layers += ( + "k2c_gru(" + + outputs + + "," + + inputs + + "," + + nm + + "_state," + + pnm + + "_kernel, \n\t" + + pnm + + "_recurrent_kernel," + + pnm + + "_bias," + + nm + + "_fwork, \n\t" + + nm + + "_reset_after," + + nm + + "_go_backwards," + + nm + + "_return_sequences, \n\t" + + "k2c_" + + layer.get_config()["recurrent_activation"] + + "," + + "k2c_" + + layer.get_config()["activation"] + + "); \n" + ) def _write_layer_SimpleRNN(self, layer, inputs, outputs, i): - nm, pnm, inputs, outputs = self._format_io_names( - layer, inputs, outputs) - self.layers += 'k2c_simpleRNN(' + outputs + ',' + inputs + \ - ',' + nm + '_state,' + pnm + '_kernel, \n\t' + \ - pnm + '_recurrent_kernel,' + pnm + '_bias,' + \ - nm + '_fwork, \n\t' + nm + '_go_backwards,' + \ - nm + '_return_sequences,' + 'k2c_' + \ - layer.get_config()['activation'] + '); \n' + nm, pnm, inputs, outputs = self._format_io_names(layer, inputs, outputs) + self.layers += ( + "k2c_simpleRNN(" + + outputs + + "," + + inputs + + "," + + nm + + "_state," + + pnm + + "_kernel, \n\t" + + pnm + + "_recurrent_kernel," + + pnm + + "_bias," + + nm + + "_fwork, \n\t" + + nm + + "_go_backwards," + + nm + + "_return_sequences," + + "k2c_" + + layer.get_config()["activation"] + + "); \n" + ) def _write_layer_Activation(self, layer, inputs, outputs, i): _, _, inputs, outputs, is_model_input, is_model_output = self._format_io_names( - layer, inputs, outputs, True) - activation = 'k2c_' + layer.get_config()['activation'] + layer, inputs, outputs, True + ) + activation = "k2c_" + layer.get_config()["activation"] if is_model_input: - inp = inputs + '->' + inp = inputs + "->" else: - inp = inputs[1:] + '.' - self.layers += activation + '(' + inp + 'array,' + inp + 'numel); \n' - self._write_dummy_layer(layer, inputs, outputs, i, - is_model_input, is_model_output) + inp = inputs[1:] + "." + self.layers += activation + "(" + inp + "array," + inp + "numel); \n" + self._write_dummy_layer( + layer, inputs, outputs, i, is_model_input, is_model_output + ) def _write_layer_LeakyReLU(self, layer, inputs, outputs, i): self._write_layer_AdvancedActivation(layer, inputs, outputs, i) @@ -422,110 +591,237 @@ def _write_layer_ReLU(self, layer, inputs, outputs, i): def _write_layer_AdvancedActivation(self, layer, inputs, outputs, i): nm, _, inputs, outputs, is_model_input, is_model_output = self._format_io_names( - layer, inputs, outputs, True) + layer, inputs, outputs, True + ) if is_model_input: - inp = inputs + '->' + inp = inputs + "->" else: - inp = inputs + '.' - - if layer_type(layer) == 'LeakyReLU': - self.layers += 'k2c_LeakyReLU(' + inp + 'array,' + \ - inp + 'numel,' + nm + '_alpha); \n' - if layer_type(layer) == 'PReLU': - self.layers += 'k2c_PReLU(' + inp + 'array,' + inp + \ - 'numel,' + nm + '_alpha.array); \n' - if layer_type(layer) == 'ELU': - self.layers += 'k2c_ELU(' + inp + 'array,' + inp + \ - 'numel,' + nm + '_alpha); \n' - if layer_type(layer) == 'ThresholdedReLU': - self.layers += 'k2c_ThresholdedReLU(' + inp + 'array,' + \ - inp + 'numel,' + nm + '_theta); \n' - if layer_type(layer) == 'ReLU': - self.layers += 'k2c_ReLU(' + inp + 'array,' + inp + \ - 'numel,' + nm + '_max_value, \n\t' + \ - nm + '_negative_slope,' + nm + '_threshold); \n' - self._write_dummy_layer(layer, inputs, outputs, i, - is_model_input, is_model_output) - - def _write_dummy_layer(self, layer, inputs, outputs, i, is_model_input, is_model_output): + inp = inputs + "." + + if layer_type(layer) == "LeakyReLU": + self.layers += ( + "k2c_LeakyReLU(" + inp + "array," + inp + "numel," + nm + "_alpha); \n" + ) + if layer_type(layer) == "PReLU": + self.layers += ( + "k2c_PReLU(" + + inp + + "array," + + inp + + "numel," + + nm + + "_alpha.array); \n" + ) + if layer_type(layer) == "ELU": + self.layers += ( + "k2c_ELU(" + inp + "array," + inp + "numel," + nm + "_alpha); \n" + ) + if layer_type(layer) == "ThresholdedReLU": + self.layers += ( + "k2c_ThresholdedReLU(" + + inp + + "array," + + inp + + "numel," + + nm + + "_theta); \n" + ) + if layer_type(layer) == "ReLU": + self.layers += ( + "k2c_ReLU(" + + inp + + "array," + + inp + + "numel," + + nm + + "_max_value, \n\t" + + nm + + "_negative_slope," + + nm + + "_threshold); \n" + ) + self._write_dummy_layer( + layer, inputs, outputs, i, is_model_input, is_model_output + ) + + def _write_dummy_layer( + self, layer, inputs, outputs, i, is_model_input, is_model_output + ): outputs = outputs.replace("&", "") inputs = inputs.replace("&", "") if is_model_input and is_model_output: - self.layers += outputs + '->ndim = ' + \ - inputs + '->ndim; // copy data into output struct \n' - self.layers += outputs + '->numel = ' + inputs + '->numel; \n' - self.layers += 'memcpy(&' + outputs + '->shape,&' + inputs + \ - '->shape,K2C_MAX_NDIM*sizeof(size_t)); \n' - self.layers += 'memcpy(' + outputs + '->array,' + inputs + '->array,' + \ - outputs + \ - '->numel*sizeof(' + outputs + '->array[0])); \n' + self.layers += ( + outputs + + "->ndim = " + + inputs + + "->ndim; // copy data into output struct \n" + ) + self.layers += outputs + "->numel = " + inputs + "->numel; \n" + self.layers += ( + "memcpy(&" + + outputs + + "->shape,&" + + inputs + + "->shape,K2C_MAX_NDIM*sizeof(size_t)); \n" + ) + self.layers += ( + "memcpy(" + + outputs + + "->array," + + inputs + + "->array," + + outputs + + "->numel*sizeof(" + + outputs + + "->array[0])); \n" + ) elif is_model_input: - self.layers += 'k2c_tensor ' + outputs + '; \n' - self.layers += outputs + '.ndim = ' + \ - inputs + '->ndim; // copy data into output struct \n' - self.layers += outputs + '.numel = ' + inputs + '->numel; \n' - self.layers += 'memcpy(' + outputs + '.shape,' + inputs + \ - '->shape,K2C_MAX_NDIM*sizeof(size_t)); \n' - self.layers += outputs + '.array = &' + inputs + \ - '->array[0]; // rename for clarity \n' + self.layers += "k2c_tensor " + outputs + "; \n" + self.layers += ( + outputs + + ".ndim = " + + inputs + + "->ndim; // copy data into output struct \n" + ) + self.layers += outputs + ".numel = " + inputs + "->numel; \n" + self.layers += ( + "memcpy(" + + outputs + + ".shape," + + inputs + + "->shape,K2C_MAX_NDIM*sizeof(size_t)); \n" + ) + self.layers += ( + outputs + ".array = &" + inputs + "->array[0]; // rename for clarity \n" + ) elif is_model_output: - self.layers += outputs + '->ndim = ' + \ - inputs + '.ndim; // copy data into output struct \n' - self.layers += outputs + '->numel = ' + inputs + '.numel; \n' - self.layers += 'memcpy(' + outputs + '->shape,' + inputs + \ - '.shape,K2C_MAX_NDIM*sizeof(size_t)); \n' - self.layers += 'memcpy(' + outputs + '->array,' + inputs + '.array,' + \ - outputs + \ - '->numel*sizeof(' + outputs + '->array[0])); \n' + self.layers += ( + outputs + + "->ndim = " + + inputs + + ".ndim; // copy data into output struct \n" + ) + self.layers += outputs + "->numel = " + inputs + ".numel; \n" + self.layers += ( + "memcpy(" + + outputs + + "->shape," + + inputs + + ".shape,K2C_MAX_NDIM*sizeof(size_t)); \n" + ) + self.layers += ( + "memcpy(" + + outputs + + "->array," + + inputs + + ".array," + + outputs + + "->numel*sizeof(" + + outputs + + "->array[0])); \n" + ) else: - self.layers += 'k2c_tensor ' + outputs + '; \n' - self.layers += outputs + '.ndim = ' + \ - inputs + '.ndim; // copy data into output struct \n' - self.layers += outputs + '.numel = ' + inputs + '.numel; \n' - self.layers += 'memcpy(' + outputs + '.shape,' + inputs + \ - '.shape,K2C_MAX_NDIM*sizeof(size_t)); \n' - self.layers += outputs + '.array = &' + inputs + \ - '.array[0]; // rename for clarity \n' + self.layers += "k2c_tensor " + outputs + "; \n" + self.layers += ( + outputs + + ".ndim = " + + inputs + + ".ndim; // copy data into output struct \n" + ) + self.layers += outputs + ".numel = " + inputs + ".numel; \n" + self.layers += ( + "memcpy(" + + outputs + + ".shape," + + inputs + + ".shape,K2C_MAX_NDIM*sizeof(size_t)); \n" + ) + self.layers += ( + outputs + ".array = &" + inputs + ".array[0]; // rename for clarity \n" + ) def _write_layer_Reshape(self, layer, inputs, outputs, i): nm, _, inputs, outputs, is_model_input, is_model_output = self._format_io_names( - layer, inputs, outputs, True) - self.layers += 'k2c_reshape(' + outputs + ',' + inputs + ',' + nm + \ - '_newshp,' + nm + '_newndim); \n' + layer, inputs, outputs, True + ) + self.layers += ( + "k2c_reshape(" + + outputs + + "," + + inputs + + "," + + nm + + "_newshp," + + nm + + "_newndim); \n" + ) def _write_layer_Flatten(self, layer, inputs, outputs, i): _, _, inputs, outputs, is_model_input, is_model_output = self._format_io_names( - layer, inputs, outputs, True) - self.layers += 'k2c_flatten(' + outputs + ',' + inputs + '); \n' + layer, inputs, outputs, True + ) + self.layers += "k2c_flatten(" + outputs + "," + inputs + "); \n" def _write_layer_Permute(self, layer, inputs, outputs, i): nm, _, inputs, outputs = self._format_io_names(layer, inputs, outputs) - self.layers += 'k2c_permute_dims(' + outputs + ',' + inputs + \ - ',' + nm + '_permute); \n' + self.layers += ( + "k2c_permute_dims(" + outputs + "," + inputs + "," + nm + "_permute); \n" + ) def _write_layer_RepeatVector(self, layer, inputs, outputs, i): nm, _, inputs, outputs = self._format_io_names(layer, inputs, outputs) - self.layers += 'k2c_repeat_vector(' + outputs + ',' + inputs + \ - ',' + nm + '_n); \n' + self.layers += ( + "k2c_repeat_vector(" + outputs + "," + inputs + "," + nm + "_n); \n" + ) def _write_layer_Dot(self, layer, inputs, outputs, i): nm, _, inputs, outputs = self._format_io_names(layer, inputs, outputs) - self.layers += 'k2c_dot(' + outputs + ',' + inputs[0] + \ - ',' + inputs[1] + ',' + nm + '_axesA,' + \ - '\n\t' + nm + '_axesB,' + nm + '_naxes,' + \ - nm + '_normalize,' + nm + '_fwork); \n' + self.layers += ( + "k2c_dot(" + + outputs + + "," + + inputs[0] + + "," + + inputs[1] + + "," + + nm + + "_axesA," + + "\n\t" + + nm + + "_axesB," + + nm + + "_naxes," + + nm + + "_normalize," + + nm + + "_fwork); \n" + ) def _write_layer_BatchNormalization(self, layer, inputs, outputs, i): - nm, pnm, inputs, outputs = self._format_io_names( - layer, inputs, outputs) - self.layers += 'k2c_batch_norm(' + outputs + ',' + inputs + \ - ',' + pnm + '_mean,' + pnm + '_stdev,' + pnm + \ - '_gamma,' + pnm + '_beta,' + nm + '_axis); \n' + nm, pnm, inputs, outputs = self._format_io_names(layer, inputs, outputs) + self.layers += ( + "k2c_batch_norm(" + + outputs + + "," + + inputs + + "," + + pnm + + "_mean," + + pnm + + "_stdev," + + pnm + + "_gamma," + + pnm + + "_beta," + + nm + + "_axis); \n" + ) def _write_layer_Embedding(self, layer, inputs, outputs, i): _, pnm, inputs, outputs = self._format_io_names(layer, inputs, outputs) - self.layers += 'k2c_embedding(' + outputs + ',' + inputs + \ - ',' + pnm + '_kernel); \n' + self.layers += ( + "k2c_embedding(" + outputs + "," + inputs + "," + pnm + "_kernel); \n" + ) def _write_layer_UpSampling1D(self, layer, inputs, outputs, i): self._write_layer_UpSampling(layer, inputs, outputs, i) @@ -537,15 +833,14 @@ def _write_layer_UpSampling3D(self, layer, inputs, outputs, i): self._write_layer_UpSampling(layer, inputs, outputs, i) def _write_layer_UpSampling(self, layer, inputs, outputs, i): - nm, _, inputs, outputs = self._format_io_names( - layer, inputs, outputs) - if layer_type(layer)[-2:] == '1D': - self.layers += 'k2c_upsampling1d(' - elif layer_type(layer)[-2:] == '2D': - self.layers += 'k2c_upsampling2d(' - elif layer_type(layer)[-2:] == '3D': - self.layers += 'k2c_upsampling3d(' - self.layers += outputs + ',' + inputs + ',' + nm + '_size); \n' + nm, _, inputs, outputs = self._format_io_names(layer, inputs, outputs) + if layer_type(layer)[-2:] == "1D": + self.layers += "k2c_upsampling1d(" + elif layer_type(layer)[-2:] == "2D": + self.layers += "k2c_upsampling2d(" + elif layer_type(layer)[-2:] == "3D": + self.layers += "k2c_upsampling3d(" + self.layers += outputs + "," + inputs + "," + nm + "_size); \n" def _write_layer_Cropping1D(self, layer, inputs, outputs, i): self._write_layer_Cropping(layer, inputs, outputs, i) @@ -557,15 +852,14 @@ def _write_layer_Cropping3D(self, layer, inputs, outputs, i): self._write_layer_Cropping(layer, inputs, outputs, i) def _write_layer_Cropping(self, layer, inputs, outputs, i): - nm, _, inputs, outputs = self._format_io_names( - layer, inputs, outputs) - if layer_type(layer)[-2:] == '1D': - self.layers += 'k2c_crop1d(' - elif layer_type(layer)[-2:] == '2D': - self.layers += 'k2c_crop2d(' - elif layer_type(layer)[-2:] == '3D': - self.layers += 'k2c_crop3d(' - self.layers += outputs + ',' + inputs + ',' + nm + '_crop); \n' + nm, _, inputs, outputs = self._format_io_names(layer, inputs, outputs) + if layer_type(layer)[-2:] == "1D": + self.layers += "k2c_crop1d(" + elif layer_type(layer)[-2:] == "2D": + self.layers += "k2c_crop2d(" + elif layer_type(layer)[-2:] == "3D": + self.layers += "k2c_crop3d(" + self.layers += outputs + "," + inputs + "," + nm + "_crop); \n" def _write_layer_ZeroPadding1D(self, layer, inputs, outputs, i): self._write_layer_ZeroPad(layer, inputs, outputs, i) @@ -577,85 +871,106 @@ def _write_layer_ZeroPadding3D(self, layer, inputs, outputs, i): self._write_layer_ZeroPad(layer, inputs, outputs, i) def _write_layer_ZeroPad(self, layer, inputs, outputs, i): - if 'Zero' in layer_type(layer): - nm, _, inputs, outputs = self._format_io_names( - layer, inputs, outputs) + if "Zero" in layer_type(layer): + nm, _, inputs, outputs = self._format_io_names(layer, inputs, outputs) else: nm = layer.name - if layer_type(layer)[-2:] == '1D': - self.layers += 'k2c_pad1d(' - elif layer_type(layer)[-2:] == '2D': - self.layers += 'k2c_pad2d(' - elif layer_type(layer)[-2:] == '3D': - self.layers += 'k2c_pad3d(' - self.layers += outputs + ',' + inputs + ',' + nm + \ - '_fill, \n\t' + nm + '_pad); \n' + if layer_type(layer)[-2:] == "1D": + self.layers += "k2c_pad1d(" + elif layer_type(layer)[-2:] == "2D": + self.layers += "k2c_pad2d(" + elif layer_type(layer)[-2:] == "3D": + self.layers += "k2c_pad3d(" + self.layers += ( + outputs + "," + inputs + "," + nm + "_fill, \n\t" + nm + "_pad); \n" + ) def _write_layer_Dropout(self, layer, inputs, outputs, i): _, _, inputs, outputs, is_model_input, is_model_output = self._format_io_names( - layer, inputs, outputs, True) - self._write_dummy_layer(layer, inputs, outputs, i, - is_model_input, is_model_output) + layer, inputs, outputs, True + ) + self._write_dummy_layer( + layer, inputs, outputs, i, is_model_input, is_model_output + ) def _write_layer_SpatialDropout1D(self, layer, inputs, outputs, i): _, _, inputs, outputs, is_model_input, is_model_output = self._format_io_names( - layer, inputs, outputs, True) - self._write_dummy_layer(layer, inputs, outputs, i, - is_model_input, is_model_output) + layer, inputs, outputs, True + ) + self._write_dummy_layer( + layer, inputs, outputs, i, is_model_input, is_model_output + ) def _write_layer_SpatialDropout2D(self, layer, inputs, outputs, i): _, _, inputs, outputs, is_model_input, is_model_output = self._format_io_names( - layer, inputs, outputs, True) - self._write_dummy_layer(layer, inputs, outputs, i, - is_model_input, is_model_output) + layer, inputs, outputs, True + ) + self._write_dummy_layer( + layer, inputs, outputs, i, is_model_input, is_model_output + ) def _write_layer_SpatialDropout3D(self, layer, inputs, outputs, i): _, _, inputs, outputs, is_model_input, is_model_output = self._format_io_names( - layer, inputs, outputs, True) - self._write_dummy_layer(layer, inputs, outputs, i, - is_model_input, is_model_output) + layer, inputs, outputs, True + ) + self._write_dummy_layer( + layer, inputs, outputs, i, is_model_input, is_model_output + ) def _write_layer_ActivityRegularization(self, layer, inputs, outputs, i): _, _, inputs, outputs, is_model_input, is_model_output = self._format_io_names( - layer, inputs, outputs, True) - self._write_dummy_layer(layer, inputs, outputs, i, - is_model_input, is_model_output) + layer, inputs, outputs, True + ) + self._write_dummy_layer( + layer, inputs, outputs, i, is_model_input, is_model_output + ) def _write_layer_GaussianNoise(self, layer, inputs, outputs, i): _, _, inputs, outputs, is_model_input, is_model_output = self._format_io_names( - layer, inputs, outputs, True) - self._write_dummy_layer(layer, inputs, outputs, i, - is_model_input, is_model_output) + layer, inputs, outputs, True + ) + self._write_dummy_layer( + layer, inputs, outputs, i, is_model_input, is_model_output + ) def _write_layer_GaussianDropout(self, layer, inputs, outputs, i): _, _, inputs, outputs, is_model_input, is_model_output = self._format_io_names( - layer, inputs, outputs, True) - self._write_dummy_layer(layer, inputs, outputs, i, - is_model_input, is_model_output) + layer, inputs, outputs, True + ) + self._write_dummy_layer( + layer, inputs, outputs, i, is_model_input, is_model_output + ) def _write_layer_AlphaDropout(self, layer, inputs, outputs, i): _, _, inputs, outputs, is_model_input, is_model_output = self._format_io_names( - layer, inputs, outputs, True) - self._write_dummy_layer(layer, inputs, outputs, i, - is_model_input, is_model_output) + layer, inputs, outputs, True + ) + self._write_dummy_layer( + layer, inputs, outputs, i, is_model_input, is_model_output + ) def _write_layer_Input(self, layer, inputs, outputs, i): - self.layers += '' + self.layers += "" def _write_layer_InputLayer(self, layer, inputs, outputs, i): - self.layers += '' + self.layers += "" def _write_layer_TFOpLambda(self, layer, inputs, outputs, i): self._write_layer_TensorFlowOpLayer(layer, inputs, outputs, i) def _write_layer_TensorFlowOpLayer(self, layer, inputs, outputs, i): - if 'split' in layer.name: - _, _, inputs, outputs = self._format_io_names( - layer, inputs, outputs) + if "split" in layer.name: + _, _, inputs, outputs = self._format_io_names(layer, inputs, outputs) offset = 0 for j, outp in enumerate(outputs): - self.layers += 'k2c_split(' + outp + ',' + inputs + ',' + str(offset) + '); \n' + self.layers += ( + "k2c_split(" + outp + "," + inputs + "," + str(offset) + "); \n" + ) offset += layer.get_output_at(i)[j].shape[-1] else: - raise AssertionError('Unsupported TensorFlowOpLayer: ' + layer.name + '\n' - + 'Currently only split operation is supported.') + raise AssertionError( + "Unsupported TensorFlowOpLayer: " + + layer.name + + "\n" + + "Currently only split operation is supported." + ) diff --git a/keras2c/weights2c.py b/keras2c/weights2c.py index 1152cbe..785540e 100644 --- a/keras2c/weights2c.py +++ b/keras2c/weights2c.py @@ -9,9 +9,9 @@ # imports import numpy as np -from keras2c.io_parsing import layer_type, get_layer_io_names, get_model_io_names import tensorflow as tf +from keras2c.io_parsing import get_layer_io_names, get_model_io_names, layer_type maxndim = 5 @@ -27,7 +27,7 @@ __email__ = "guptaa@fusion.gat.com" -class Weights2C(): +class Weights2C: """Creates an object to extract and write weights and other model parameters Args: @@ -37,12 +37,11 @@ class Weights2C(): """ def __init__(self, model, function_name, malloc=False): - self.model = model self.function_name = function_name self.model_io = get_model_io_names(self.model) self.malloc = malloc - self.stack_vars = '' + self.stack_vars = "" self.malloc_vars = {} self.static_vars = {} @@ -58,41 +57,59 @@ def array2c(array, name, malloc=False): Returns: arr (str): generated code for the array as a k2c_tensor """ - temp = array.flatten(order='C') + temp = array.flatten(order="C") size = array.size shp = array.shape ndim = len(shp) - shp = np.concatenate((shp, np.ones(maxndim-ndim))) + shp = np.concatenate((shp, np.ones(maxndim - ndim))) if malloc: to_malloc = {} - s = 'k2c_tensor ' + name + ' = {' + name + \ - '_array,' + str(int(ndim)) + ',' + str(int(size)) + ',{' + \ - np.array2string(shp.astype(int), separator=',')[ - 1:-1] + '}}; \n' - to_malloc.update({name + '_array': temp}) + s = ( + "k2c_tensor " + + name + + " = {" + + name + + "_array," + + str(int(ndim)) + + "," + + str(int(size)) + + ",{" + + np.array2string(shp.astype(int), separator=",")[1:-1] + + "}}; \n" + ) + to_malloc.update({name + "_array": temp}) return s, to_malloc else: count = 0 - s = 'float ' + name + '_array[' + str(size) + '] = ' + s = "float " + name + "_array[" + str(size) + "] = " if np.max(np.abs(temp)) < 1e-16: - s += '{' + str(0) + '}; \n' + s += "{" + str(0) + "}; \n" else: - s += '{\n' + s += "{\n" for i in range(size): if temp[i] == np.inf: s += "HUGE_VALF," elif temp[i] == -np.inf: s += "-HUGE_VALF," else: - s += "{:+.8e}f".format(temp[i]) + ',' + s += "{:+.8e}f".format(temp[i]) + "," count += 1 if (count) % 5 == 0: - s += '\n' - s += '}; \n' - s += 'k2c_tensor ' + name + ' = {&' + name + \ - '_array[0],' + str(int(ndim)) + ',' + str(int(size)) + ',{' + \ - np.array2string(shp.astype(int), separator=',')[ - 1:-1] + '}}; \n' + s += "\n" + s += "}; \n" + s += ( + "k2c_tensor " + + name + + " = {&" + + name + + "_array[0]," + + str(int(ndim)) + + "," + + str(int(size)) + + ",{" + + np.array2string(shp.astype(int), separator=",")[1:-1] + + "}}; \n" + ) return s def _write_weights_array2c(self, array, name): @@ -104,7 +121,7 @@ def _write_weights_array2c(self, array, name): self.stack_vars += temp def _write_weights_layer(self, layer, **kwargs): - method = getattr(self, '_write_weights_' + layer_type(layer)) + method = getattr(self, "_write_weights_" + layer_type(layer)) return method(layer, **kwargs) def write_weights(self, verbose=True): @@ -123,19 +140,19 @@ def write_weights(self, verbose=True): (eg, states of a stateful RNN) """ for layer in self.model.layers: - method = getattr(self, '_write_weights_' + layer_type(layer)) + method = getattr(self, "_write_weights_" + layer_type(layer)) method(layer) return self.stack_vars, self.malloc_vars, self._write_static_vars() def _write_static_vars(self): if len(self.static_vars) > 0: - s = 'static struct ' + self.function_name + '_static_vars \n' - s += '{ \n' + s = "static struct " + self.function_name + "_static_vars \n" + s += "{ \n" for k, v in self.static_vars.items(): - s += 'float ' + k + '[' + str(v) + ']; \n' - s += '} ' + self.function_name + '_states; \n' + s += "float " + k + "[" + str(v) + "]; \n" + s += "} " + self.function_name + "_states; \n" else: - s = '' + s = "" return s def _write_outputs(self, layer): @@ -147,19 +164,18 @@ def _write_outputs(self, layer): outshp = layer.get_output_at(i)[j].shape[1:] if outpp not in self.model_io[1]: self._write_weights_array2c( - np.zeros(outshp), outpp + '_output') + np.zeros(outshp), outpp + "_output" + ) else: outshp = layer.output.shape[1:] if outp not in self.model_io[1]: - self._write_weights_array2c( - np.zeros(outshp), outp + '_output') + self._write_weights_array2c(np.zeros(outshp), outp + "_output") else: outshp = layer.output_shape[1:] if outputs[0] not in self.model_io[1]: # self._write_weights_array2c( # np.zeros(outshp), outputs[0] + '_output') - self._write_weights_array2c( - np.zeros(outshp), layer.name + '_output') + self._write_weights_array2c(np.zeros(outshp), layer.name + "_output") def _write_weights_Bidirectional(self, layer): try: @@ -191,31 +207,45 @@ def _write_weights_Bidirectional(self, layer): self._write_weights_layer(layer.backward_layer, skip_outputs=True) outshp = layer.backward_layer.output.shape[1:] - self._write_weights_array2c(np.zeros(outshp), layer.backward_layer.name + '_output') + self._write_weights_array2c( + np.zeros(outshp), layer.backward_layer.name + "_output" + ) self._write_weights_layer(layer.forward_layer, skip_outputs=True) outshp = layer.forward_layer.output.shape[1:] - self._write_weights_array2c(np.zeros(outshp), layer.forward_layer.name + '_output') + self._write_weights_array2c( + np.zeros(outshp), layer.forward_layer.name + "_output" + ) if layer.merge_mode: self._write_outputs(layer) - self.stack_vars += 'size_t ' + layer.name + '_num_tensors' + str(0) + \ - ' = ' + str(2) + '; \n' - if layer.merge_mode == 'concat': + self.stack_vars += ( + "size_t " + + layer.name + + "_num_tensors" + + str(0) + + " = " + + str(2) + + "; \n" + ) + if layer.merge_mode == "concat": if layer.return_sequences: ax = 1 else: ax = 0 - self.stack_vars += 'size_t ' + layer.name + '_axis = ' +\ - str(ax) + '; \n' + self.stack_vars += ( + "size_t " + layer.name + "_axis = " + str(ax) + "; \n" + ) else: output_names = get_layer_io_names(layer)[1][0] subname = layer.forward_layer.name - self.stack_vars += 'k2c_tensor * ' + \ - output_names[0] + ' = ' + subname + '_output; \n' + self.stack_vars += ( + "k2c_tensor * " + output_names[0] + " = " + subname + "_output; \n" + ) subname = layer.backward_layer.name - self.stack_vars += 'k2c_tensor * ' + \ - output_names[1] + ' = ' + subname + '_output; \n' + self.stack_vars += ( + "k2c_tensor * " + output_names[1] + " = " + subname + "_output; \n" + ) def _write_weights_TimeDistributed(self, layer, skip_outputs=False): if not skip_outputs: @@ -223,41 +253,55 @@ def _write_weights_TimeDistributed(self, layer, skip_outputs=False): try: foo = layer.layer.input.shape except: - temp_input = tf.keras.layers.Input( - layer.input.shape[2:], batch_size=1) + temp_input = tf.keras.layers.Input(layer.input.shape[2:], batch_size=1) foo = layer.layer.__call__(temp_input) self._write_weights_layer(layer.layer) timeslice_input = np.squeeze(np.zeros(layer.layer.input.shape[1:])) timeslice_output = np.squeeze(np.zeros(layer.layer.output.shape[1:])) self._write_weights_array2c( - timeslice_input, layer.layer.name + '_timeslice_input') + timeslice_input, layer.layer.name + "_timeslice_input" + ) self._write_weights_array2c( - timeslice_output, layer.layer.name + '_timeslice_output') - self.stack_vars += 'const size_t ' + layer.name +\ - '_timesteps = ' + str(layer.input.shape[1]) + '; \n' - self.stack_vars += 'const size_t ' + layer.name +\ - '_in_offset = ' + \ - str(np.prod(layer.input.shape[2:])) + '; \n' - self.stack_vars += 'const size_t ' + layer.name +\ - '_out_offset = ' + \ - str(np.prod(layer.output.shape[2:])) + '; \n' + timeslice_output, layer.layer.name + "_timeslice_output" + ) + self.stack_vars += ( + "const size_t " + + layer.name + + "_timesteps = " + + str(layer.input.shape[1]) + + "; \n" + ) + self.stack_vars += ( + "const size_t " + + layer.name + + "_in_offset = " + + str(np.prod(layer.input.shape[2:])) + + "; \n" + ) + self.stack_vars += ( + "const size_t " + + layer.name + + "_out_offset = " + + str(np.prod(layer.output.shape[2:])) + + "; \n" + ) def _write_weights_Input(self, layer): - self.stack_vars += '' + self.stack_vars += "" def _write_weights_InputLayer(self, layer): - self.stack_vars += '' + self.stack_vars += "" def _write_weights_BatchNormalization(self, layer, skip_outputs=False): cfg = layer.get_config() - center = cfg['center'] - scale = cfg['scale'] - if isinstance(cfg['axis'], (list, tuple, np.ndarray)): - axis = cfg['axis'][0]-1 + center = cfg["center"] + scale = cfg["scale"] + if isinstance(cfg["axis"], (list, tuple, np.ndarray)): + axis = cfg["axis"][0] - 1 else: - axis = cfg['axis']-1 + axis = cfg["axis"] - 1 - epsilon = cfg['epsilon'] + epsilon = cfg["epsilon"] if center and scale: gamma = layer.get_weights()[0] @@ -283,128 +327,185 @@ def _write_weights_BatchNormalization(self, layer, skip_outputs=False): stdev = np.sqrt(variance + epsilon) if not skip_outputs: self._write_outputs(layer) - self.stack_vars += 'size_t ' + layer.name + \ - '_axis = ' + str(axis) + '; \n' - self._write_weights_array2c(mean, layer.name + '_mean') - self._write_weights_array2c(stdev, layer.name + '_stdev') - self._write_weights_array2c(gamma, layer.name + '_gamma') - self._write_weights_array2c(beta, layer.name + '_beta') - self.stack_vars += '\n\n' + self.stack_vars += "size_t " + layer.name + "_axis = " + str(axis) + "; \n" + self._write_weights_array2c(mean, layer.name + "_mean") + self._write_weights_array2c(stdev, layer.name + "_stdev") + self._write_weights_array2c(gamma, layer.name + "_gamma") + self._write_weights_array2c(beta, layer.name + "_beta") + self.stack_vars += "\n\n" def _write_weights_LSTM(self, layer, skip_outputs=False): cfg = layer.get_config() - units = cfg['units'] + units = cfg["units"] if not skip_outputs: self._write_outputs(layer) - self.stack_vars += 'float ' + layer.name + \ - '_fwork[' + str(8*units) + '] = {0}; \n' - self.stack_vars += 'int ' + layer.name + '_go_backwards = ' + \ - str(int(cfg['go_backwards'])) + ';\n' - self.stack_vars += 'int ' + layer.name + '_return_sequences = ' + \ - str(int(cfg['return_sequences'])) + ';\n' - if cfg['stateful']: - self.static_vars.update({layer.name + '_state': 2*units}) - self.stack_vars += 'float * ' + layer.name + '_state = ' + \ - self.function_name + '_states.' + \ - layer.name + '_state; \n' + self.stack_vars += ( + "float " + layer.name + "_fwork[" + str(8 * units) + "] = {0}; \n" + ) + self.stack_vars += ( + "int " + + layer.name + + "_go_backwards = " + + str(int(cfg["go_backwards"])) + + ";\n" + ) + self.stack_vars += ( + "int " + + layer.name + + "_return_sequences = " + + str(int(cfg["return_sequences"])) + + ";\n" + ) + if cfg["stateful"]: + self.static_vars.update({layer.name + "_state": 2 * units}) + self.stack_vars += ( + "float * " + + layer.name + + "_state = " + + self.function_name + + "_states." + + layer.name + + "_state; \n" + ) else: - self.stack_vars += 'float ' + layer.name + \ - '_state[' + str(2*units) + '] = {0}; \n' + self.stack_vars += ( + "float " + layer.name + "_state[" + str(2 * units) + "] = {0}; \n" + ) weights = layer.get_weights() kernel = weights[0] recurrent_kernel = weights[1] - if cfg['use_bias']: + if cfg["use_bias"]: bias = weights[2] else: - bias = np.zeros(4*units) + bias = np.zeros(4 * units) ckernel = np.concatenate(np.split(kernel, 4, axis=1), axis=0) crecurrent_kernel = np.concatenate( - np.split(recurrent_kernel, 4, axis=1), axis=0) - self._write_weights_array2c(ckernel, layer.name + '_kernel') - self._write_weights_array2c( - crecurrent_kernel, layer.name + '_recurrent_kernel') - self._write_weights_array2c(bias, layer.name + '_bias') - self.stack_vars += '\n \n' + np.split(recurrent_kernel, 4, axis=1), axis=0 + ) + self._write_weights_array2c(ckernel, layer.name + "_kernel") + self._write_weights_array2c(crecurrent_kernel, layer.name + "_recurrent_kernel") + self._write_weights_array2c(bias, layer.name + "_bias") + self.stack_vars += "\n \n" def _write_weights_GRU(self, layer, skip_outputs=False): cfg = layer.get_config() - units = cfg['units'] + units = cfg["units"] if not skip_outputs: self._write_outputs(layer) - self.stack_vars += 'float ' + layer.name + \ - '_fwork[' + str(6*units) + '] = {0}; \n' - self.stack_vars += 'int ' + layer.name + '_reset_after = ' + \ - str(int(cfg['reset_after'])) + ';\n' - self.stack_vars += 'int ' + layer.name + '_go_backwards = ' + \ - str(int(cfg['go_backwards'])) + ';\n' - self.stack_vars += 'int ' + layer.name + '_return_sequences = ' + \ - str(int(cfg['return_sequences'])) + ';\n' - if cfg['stateful']: - self.static_vars.update({layer.name + '_state': units}) - self.stack_vars += 'float * ' + layer.name + '_state = ' + \ - self.function_name + '_states.' + \ - layer.name + '_state; \n' + self.stack_vars += ( + "float " + layer.name + "_fwork[" + str(6 * units) + "] = {0}; \n" + ) + self.stack_vars += ( + "int " + + layer.name + + "_reset_after = " + + str(int(cfg["reset_after"])) + + ";\n" + ) + self.stack_vars += ( + "int " + + layer.name + + "_go_backwards = " + + str(int(cfg["go_backwards"])) + + ";\n" + ) + self.stack_vars += ( + "int " + + layer.name + + "_return_sequences = " + + str(int(cfg["return_sequences"])) + + ";\n" + ) + if cfg["stateful"]: + self.static_vars.update({layer.name + "_state": units}) + self.stack_vars += ( + "float * " + + layer.name + + "_state = " + + self.function_name + + "_states." + + layer.name + + "_state; \n" + ) else: - self.stack_vars += 'float ' + layer.name + \ - '_state[' + str(units) + '] = {0}; \n' + self.stack_vars += ( + "float " + layer.name + "_state[" + str(units) + "] = {0}; \n" + ) weights = layer.get_weights() kernel = weights[0] recurrent_kernel = weights[1] - if cfg['use_bias']: + if cfg["use_bias"]: bias = weights[2] - if cfg['reset_after']: + if cfg["reset_after"]: rbias = bias[1] bias = bias[0] else: bias = bias - rbias = np.zeros(3*units) + rbias = np.zeros(3 * units) else: - bias = np.zeros(3*units) - rbias = np.zeros(3*units) + bias = np.zeros(3 * units) + rbias = np.zeros(3 * units) cbias = np.concatenate([bias, rbias], axis=0) ckernel = np.concatenate(np.split(kernel, 3, axis=1), axis=0) crecurrent_kernel = np.concatenate( - np.split(recurrent_kernel, 3, axis=1), axis=0) - self._write_weights_array2c(ckernel, layer.name + '_kernel') - self._write_weights_array2c(crecurrent_kernel, layer.name + - '_recurrent_kernel') - self._write_weights_array2c(cbias, layer.name + '_bias') - self.stack_vars += '\n \n' + np.split(recurrent_kernel, 3, axis=1), axis=0 + ) + self._write_weights_array2c(ckernel, layer.name + "_kernel") + self._write_weights_array2c(crecurrent_kernel, layer.name + "_recurrent_kernel") + self._write_weights_array2c(cbias, layer.name + "_bias") + self.stack_vars += "\n \n" def _write_weights_SimpleRNN(self, layer, skip_outputs=False): cfg = layer.get_config() - units = cfg['units'] + units = cfg["units"] if not skip_outputs: self._write_outputs(layer) - self.stack_vars += 'int ' + layer.name + '_go_backwards = ' + \ - str(int(cfg['go_backwards'])) + ';\n' - self.stack_vars += 'int ' + layer.name + '_return_sequences = ' + \ - str(int(cfg['return_sequences'])) + ';\n' - self.stack_vars += 'float ' + layer.name + \ - '_fwork[' + str(2*units) + '] = {0}; \n' - if cfg['stateful']: - self.static_vars.update({layer.name + '_state': units}) - self.stack_vars += 'float * ' + layer.name + '_state = ' + \ - self.function_name + '_states.' + \ - layer.name + '_state; \n' + self.stack_vars += ( + "int " + + layer.name + + "_go_backwards = " + + str(int(cfg["go_backwards"])) + + ";\n" + ) + self.stack_vars += ( + "int " + + layer.name + + "_return_sequences = " + + str(int(cfg["return_sequences"])) + + ";\n" + ) + self.stack_vars += ( + "float " + layer.name + "_fwork[" + str(2 * units) + "] = {0}; \n" + ) + if cfg["stateful"]: + self.static_vars.update({layer.name + "_state": units}) + self.stack_vars += ( + "float * " + + layer.name + + "_state = " + + self.function_name + + "_states." + + layer.name + + "_state; \n" + ) else: - self.stack_vars += 'float ' + layer.name + \ - '_state[' + str(units) + '] = {0}; \n' + self.stack_vars += ( + "float " + layer.name + "_state[" + str(units) + "] = {0}; \n" + ) weights = layer.get_weights() kernel = weights[0] recurrent_kernel = weights[1] - if cfg['use_bias']: + if cfg["use_bias"]: bias = weights[2] else: bias = np.zeros(units) - self._write_weights_array2c(kernel, layer.name + '_kernel') - self._write_weights_array2c(recurrent_kernel, layer.name + - '_recurrent_kernel') - self._write_weights_array2c(bias, layer.name + '_bias') - self.stack_vars += '\n \n' + self._write_weights_array2c(kernel, layer.name + "_kernel") + self._write_weights_array2c(recurrent_kernel, layer.name + "_recurrent_kernel") + self._write_weights_array2c(bias, layer.name + "_bias") + self.stack_vars += "\n \n" def _write_weights_Dense(self, layer, skip_outputs=False): cfg = layer.get_config() @@ -412,168 +513,218 @@ def _write_weights_Dense(self, layer, skip_outputs=False): self._write_outputs(layer) weights = layer.get_weights() A = weights[0] - if cfg['use_bias']: + if cfg["use_bias"]: b = weights[1] else: b = np.zeros(A.shape[1]) - self._write_weights_array2c(A, layer.name + '_kernel') - self._write_weights_array2c(b, layer.name + '_bias') - self.stack_vars += 'float ' + layer.name + \ - '_fwork[' + str(np.prod(layer.input.shape[1:]) + - np.prod(A.shape)) + '] = {0}; \n' - self.stack_vars += '\n \n' + self._write_weights_array2c(A, layer.name + "_kernel") + self._write_weights_array2c(b, layer.name + "_bias") + self.stack_vars += ( + "float " + + layer.name + + "_fwork[" + + str(np.prod(layer.input.shape[1:]) + np.prod(A.shape)) + + "] = {0}; \n" + ) + self.stack_vars += "\n \n" def _write_weights_Conv1D(self, layer, skip_outputs=False): cfg = layer.get_config() - padding = cfg['padding'] - stride = cfg['strides'][0] - dilation = cfg['dilation_rate'][0] - kernel_size = cfg['kernel_size'][0] - self.stack_vars += 'size_t ' + layer.name + \ - '_stride = ' + str(stride) + '; \n' - self.stack_vars += 'size_t ' + layer.name + \ - '_dilation = ' + str(dilation) + '; \n' + padding = cfg["padding"] + stride = cfg["strides"][0] + dilation = cfg["dilation_rate"][0] + kernel_size = cfg["kernel_size"][0] + self.stack_vars += "size_t " + layer.name + "_stride = " + str(stride) + "; \n" + self.stack_vars += ( + "size_t " + layer.name + "_dilation = " + str(dilation) + "; \n" + ) if not skip_outputs: self._write_outputs(layer) inshp = layer.input.shape[1:] - if padding == 'causal': - pad_along_height = dilation*(kernel_size-1) + if padding == "causal": + pad_along_height = dilation * (kernel_size - 1) pad_top = pad_along_height pad_bottom = 0 - self._write_weights_array2c(np.zeros((inshp[0]+pad_top+pad_bottom, inshp[1])), - layer.name + '_padded_input') - self.stack_vars += 'size_t ' + layer.name + '_pad[2] = {' + str(pad_top) + ','\ - + str(pad_bottom) + '}; \n' - self.stack_vars += 'float ' + layer.name + '_fill = 0.0f; \n' - elif padding == 'same': - pad_along_height = dilation*(kernel_size-1) + self._write_weights_array2c( + np.zeros((inshp[0] + pad_top + pad_bottom, inshp[1])), + layer.name + "_padded_input", + ) + self.stack_vars += ( + "size_t " + + layer.name + + "_pad[2] = {" + + str(pad_top) + + "," + + str(pad_bottom) + + "}; \n" + ) + self.stack_vars += "float " + layer.name + "_fill = 0.0f; \n" + elif padding == "same": + pad_along_height = dilation * (kernel_size - 1) pad_top = int(pad_along_height // 2) pad_bottom = int(pad_along_height - pad_top) - self._write_weights_array2c(np.zeros((inshp[0]+pad_top+pad_bottom, inshp[1])), - layer.name + '_padded_input') - self.stack_vars += 'size_t ' + layer.name + '_pad[2] = {' + str(pad_top) + ','\ - + str(pad_bottom) + '}; \n' + self._write_weights_array2c( + np.zeros((inshp[0] + pad_top + pad_bottom, inshp[1])), + layer.name + "_padded_input", + ) + self.stack_vars += ( + "size_t " + + layer.name + + "_pad[2] = {" + + str(pad_top) + + "," + + str(pad_bottom) + + "}; \n" + ) self.stack_vars += "float " + layer.name + "_fill = 0.0f; \n" weights = layer.get_weights() kernel = weights[0] - if cfg['use_bias']: + if cfg["use_bias"]: bias = weights[1] else: bias = np.zeros(kernel.shape[2]) - self._write_weights_array2c(kernel, layer.name + '_kernel') - self._write_weights_array2c(bias, layer.name + '_bias') - self.stack_vars += '\n \n' + self._write_weights_array2c(kernel, layer.name + "_kernel") + self._write_weights_array2c(bias, layer.name + "_bias") + self.stack_vars += "\n \n" def _write_weights_Conv2D(self, layer, skip_outputs=False): cfg = layer.get_config() - padding = cfg['padding'] - stride = cfg['strides'] - dilation = cfg['dilation_rate'] - kernel_size = cfg['kernel_size'] - self.stack_vars += 'size_t ' + layer.name + \ - '_stride[2] = {' + ','.join([str(i) for i in stride]) + '}; \n' - self.stack_vars += 'size_t ' + layer.name + \ - '_dilation[2] = {' + ','.join([str(i) - for i in dilation]) + '}; \n' + padding = cfg["padding"] + stride = cfg["strides"] + dilation = cfg["dilation_rate"] + kernel_size = cfg["kernel_size"] + self.stack_vars += ( + "size_t " + + layer.name + + "_stride[2] = {" + + ",".join([str(i) for i in stride]) + + "}; \n" + ) + self.stack_vars += ( + "size_t " + + layer.name + + "_dilation[2] = {" + + ",".join([str(i) for i in dilation]) + + "}; \n" + ) if not skip_outputs: self._write_outputs(layer) - if padding == 'same': + if padding == "same": inshp = layer.input.shape[1:] - pad_along_height = dilation[0]*(kernel_size[0]-1) + pad_along_height = dilation[0] * (kernel_size[0] - 1) pad_top = int(pad_along_height // 2) pad_bottom = int(pad_along_height - pad_top) - pad_along_width = dilation[1]*(kernel_size[1]-1) - pad_left = pad_along_width//2 + pad_along_width = dilation[1] * (kernel_size[1] - 1) + pad_left = pad_along_width // 2 pad_right = pad_along_width - pad_left - padshp = (inshp[0]+pad_along_height, - inshp[1]+pad_along_width, inshp[2]) + padshp = (inshp[0] + pad_along_height, inshp[1] + pad_along_width, inshp[2]) pad = [pad_top, pad_bottom, pad_left, pad_right] - self._write_weights_array2c(np.zeros(padshp), layer.name + - '_padded_input') - self.stack_vars += 'size_t ' + layer.name + \ - '_pad[4] = {' + ','.join([str(i) for i in pad]) + '}; \n' - self.stack_vars += 'float ' + layer.name + '_fill = 0.0f; \n' + self._write_weights_array2c(np.zeros(padshp), layer.name + "_padded_input") + self.stack_vars += ( + "size_t " + + layer.name + + "_pad[4] = {" + + ",".join([str(i) for i in pad]) + + "}; \n" + ) + self.stack_vars += "float " + layer.name + "_fill = 0.0f; \n" weights = layer.get_weights() kernel = weights[0] - if cfg['use_bias']: + if cfg["use_bias"]: bias = weights[1] else: bias = np.zeros(kernel.shape[3]) - self._write_weights_array2c(kernel, layer.name + '_kernel') - self._write_weights_array2c(bias, layer.name + '_bias') - self.stack_vars += '\n \n' + self._write_weights_array2c(kernel, layer.name + "_kernel") + self._write_weights_array2c(bias, layer.name + "_bias") + self.stack_vars += "\n \n" def _write_weights_Conv3D(self, layer, skip_outputs=False): cfg = layer.get_config() - padding = cfg['padding'] - stride = cfg['strides'] - dilation = cfg['dilation_rate'] - kernel_size = cfg['kernel_size'] - self.stack_vars += 'size_t ' + layer.name + \ - '_stride[3] = {' + ','.join([str(i) for i in stride]) + '}; \n' - self.stack_vars += 'size_t ' + layer.name + \ - '_dilation[3] = {' + ','.join([str(i) - for i in dilation]) + '}; \n' + padding = cfg["padding"] + stride = cfg["strides"] + dilation = cfg["dilation_rate"] + kernel_size = cfg["kernel_size"] + self.stack_vars += ( + "size_t " + + layer.name + + "_stride[3] = {" + + ",".join([str(i) for i in stride]) + + "}; \n" + ) + self.stack_vars += ( + "size_t " + + layer.name + + "_dilation[3] = {" + + ",".join([str(i) for i in dilation]) + + "}; \n" + ) if not skip_outputs: self._write_outputs(layer) - if padding == 'same': + if padding == "same": inshp = layer.input.shape[1:] - pad_along_height = dilation[0]*(kernel_size[0]-1) + pad_along_height = dilation[0] * (kernel_size[0] - 1) pad_top = int(pad_along_height // 2) pad_bottom = int(pad_along_height - pad_top) - pad_along_width = dilation[1]*(kernel_size[1]-1) - pad_left = pad_along_width//2 + pad_along_width = dilation[1] * (kernel_size[1] - 1) + pad_left = pad_along_width // 2 pad_right = pad_along_width - pad_left - pad_along_depth = dilation[1]*(kernel_size[1]-1) - pad_front = pad_along_depth//2 + pad_along_depth = dilation[1] * (kernel_size[1] - 1) + pad_front = pad_along_depth // 2 pad_back = pad_along_depth - pad_front - padshp = (inshp[0]+pad_along_height, - inshp[1]+pad_along_width, - inshp[2]+pad_along_depth, - inshp[3]) - pad = [pad_top, pad_bottom, pad_left, - pad_right, pad_front, pad_back] - self._write_weights_array2c(np.zeros(padshp), layer.name + - '_padded_input') - self.stack_vars += 'size_t ' + layer.name + \ - '_pad[6] = {' + ','.join([str(i) for i in pad]) + '}; \n' - self.stack_vars += 'float ' + layer.name + '_fill = 0.0f; \n' + padshp = ( + inshp[0] + pad_along_height, + inshp[1] + pad_along_width, + inshp[2] + pad_along_depth, + inshp[3], + ) + pad = [pad_top, pad_bottom, pad_left, pad_right, pad_front, pad_back] + self._write_weights_array2c(np.zeros(padshp), layer.name + "_padded_input") + self.stack_vars += ( + "size_t " + + layer.name + + "_pad[6] = {" + + ",".join([str(i) for i in pad]) + + "}; \n" + ) + self.stack_vars += "float " + layer.name + "_fill = 0.0f; \n" weights = layer.get_weights() kernel = weights[0] - if cfg['use_bias']: + if cfg["use_bias"]: bias = weights[1] else: bias = np.zeros(kernel.shape[3]) - self._write_weights_array2c(kernel, layer.name + '_kernel') - self._write_weights_array2c(bias, layer.name + '_bias') - self.stack_vars += '\n \n' + self._write_weights_array2c(kernel, layer.name + "_kernel") + self._write_weights_array2c(bias, layer.name + "_bias") + self.stack_vars += "\n \n" def _write_weights_Conv1DTranspose(self, layer, skip_outputs=False): cfg = layer.get_config() - padding = cfg['padding'] - stride = cfg['strides'][0] - dilation = cfg['dilation_rate'][0] + padding = cfg["padding"] + stride = cfg["strides"][0] + dilation = cfg["dilation_rate"][0] if dilation != 1: - raise ValueError('Dilation not supported for Conv1DTranspose') - kernel_size = cfg['kernel_size'][0] + raise ValueError("Dilation not supported for Conv1DTranspose") + kernel_size = cfg["kernel_size"][0] # Write stride to C - self.stack_vars += 'size_t ' + layer.name + \ - '_stride = ' + str(stride) + '; \n' + self.stack_vars += "size_t " + layer.name + "_stride = " + str(stride) + "; \n" - if padding == 'valid': + if padding == "valid": start_crop = 0 - elif padding == 'same': + elif padding == "same": start_crop = (kernel_size - stride) // 2 else: - raise ValueError('Only same and valid padding supported for Conv1DTranspose') + raise ValueError( + "Only same and valid padding supported for Conv1DTranspose" + ) # Write start_crop to C - self.stack_vars += 'size_t ' + layer.name + \ - '_start_crop = ' + str(start_crop) + '; \n' + self.stack_vars += ( + "size_t " + layer.name + "_start_crop = " + str(start_crop) + "; \n" + ) # Initialize layer.name + '_output' if not skip_outputs: @@ -582,13 +733,82 @@ def _write_weights_Conv1DTranspose(self, layer, skip_outputs=False): # Write kernel and bias to C weights = layer.get_weights() kernel = weights[0] - if cfg['use_bias']: + if cfg["use_bias"]: bias = weights[1] else: bias = np.zeros(kernel.shape[1]) - self._write_weights_array2c(kernel, layer.name + '_kernel') - self._write_weights_array2c(bias, layer.name + '_bias') - self.stack_vars += '\n \n' + self._write_weights_array2c(kernel, layer.name + "_kernel") + self._write_weights_array2c(bias, layer.name + "_bias") + self.stack_vars += "\n \n" + + def _write_weights_Conv2DTranspose(self, layer, skip_outputs=False): + cfg = layer.get_config() + padding = cfg["padding"] + stride = cfg["strides"] + dilation = cfg["dilation_rate"] + if dilation[0] != 1 or dilation[1] != 1: + raise ValueError("Dilation not supported for Conv2DTranspose") + kernel_size = cfg["kernel_size"] + + # Write stride to C + self.stack_vars += ( + "size_t " + + layer.name + + "_stride[2] = {" + + ",".join([str(i) for i in stride]) + + "}; \n" + ) + + # Write dilation to C + self.stack_vars += ( + "size_t " + + layer.name + + "_dilation[2] = {" + + ",".join([str(i) for i in dilation]) + + "}; \n" + ) + + # Calculate padding (crop amounts) based on padding mode + if padding == "valid": + crop_h = 0 + crop_w = 0 + elif padding == "same": + crop_h = (kernel_size[0] - stride[0]) // 2 + crop_w = (kernel_size[1] - stride[1]) // 2 + else: + raise ValueError( + "Only same and valid padding supported for Conv2DTranspose" + ) + + # Write padding (crop) to C + self.stack_vars += ( + "size_t " + + layer.name + + "_padding[2] = {" + + str(crop_h) + + "," + + str(crop_w) + + "}; \n" + ) + + # Initialize layer.name + '_output' + if not skip_outputs: + self._write_outputs(layer) + + # Write kernel and bias to C + # Keras stores kernel as (rows, cols, out_channels, in_channels) + # C code expects (rows, cols, in_channels, out_channels) + # So we need to transpose the last two dimensions + weights = layer.get_weights() + kernel = weights[0] + kernel = np.transpose(kernel, (0, 1, 3, 2)) + if cfg["use_bias"]: + bias = weights[1] + else: + bias = np.zeros(kernel.shape[3]) + self._write_weights_array2c(kernel, layer.name + "_kernel") + self._write_weights_array2c(bias, layer.name + "_bias") + self.stack_vars += "\n \n" def _write_weights_MaxPooling1D(self, layer, **kwargs): return self._write_weights_Pooling1D(layer, **kwargs) @@ -598,28 +818,36 @@ def _write_weights_AveragePooling1D(self, layer, **kwargs): def _write_weights_Pooling1D(self, layer, skip_outputs=False): cfg = layer.get_config() - pad = cfg['padding'] - stride = cfg['strides'][0] - pool_size = cfg['pool_size'][0] - self.stack_vars += 'size_t ' + layer.name + \ - '_stride = ' + str(stride) + '; \n' - self.stack_vars += 'size_t ' + layer.name + \ - '_pool_size = ' + str(pool_size) + '; \n' + pad = cfg["padding"] + stride = cfg["strides"][0] + pool_size = cfg["pool_size"][0] + self.stack_vars += "size_t " + layer.name + "_stride = " + str(stride) + "; \n" + self.stack_vars += ( + "size_t " + layer.name + "_pool_size = " + str(pool_size) + "; \n" + ) if not skip_outputs: self._write_outputs(layer) inshp = layer.input.shape[1:] outshp = layer.output.shape[1:] - if pad == 'same': - pad_along_height = max((outshp[0] - 1) * stride + - pool_size - inshp[0], 0) + if pad == "same": + pad_along_height = max((outshp[0] - 1) * stride + pool_size - inshp[0], 0) pad_top = int(pad_along_height // 2) pad_bottom = int(pad_along_height - pad_top) - self._write_weights_array2c(np.zeros((inshp[0]+pad_top+pad_bottom, inshp[1])), - layer.name + '_padded_input') - self.stack_vars += 'size_t ' + layer.name + '_pad[2] = {' + str(pad_top) + ','\ - + str(pad_bottom) + '}; \n' - self.stack_vars += 'float ' + layer.name + '_fill = -HUGE_VALF; \n' - self.stack_vars += '\n\n' + self._write_weights_array2c( + np.zeros((inshp[0] + pad_top + pad_bottom, inshp[1])), + layer.name + "_padded_input", + ) + self.stack_vars += ( + "size_t " + + layer.name + + "_pad[2] = {" + + str(pad_top) + + "," + + str(pad_bottom) + + "}; \n" + ) + self.stack_vars += "float " + layer.name + "_fill = -HUGE_VALF; \n" + self.stack_vars += "\n\n" def _write_weights_MaxPooling2D(self, layer, **kwargs): return self._write_weights_Pooling2D(layer, **kwargs) @@ -629,36 +857,50 @@ def _write_weights_AveragePooling2D(self, layer, **kwargs): def _write_weights_Pooling2D(self, layer, skip_outputs=False): cfg = layer.get_config() - padding = cfg['padding'] - stride = cfg['strides'] - pool_size = cfg['pool_size'] - self.stack_vars += 'size_t ' + layer.name + \ - '_stride[2] = {' + ','.join([str(i) for i in stride]) + '}; \n' - self.stack_vars += 'size_t ' + layer.name + \ - '_pool_size[2] = {' + ','.join([str(i) - for i in pool_size]) + '}; \n' + padding = cfg["padding"] + stride = cfg["strides"] + pool_size = cfg["pool_size"] + self.stack_vars += ( + "size_t " + + layer.name + + "_stride[2] = {" + + ",".join([str(i) for i in stride]) + + "}; \n" + ) + self.stack_vars += ( + "size_t " + + layer.name + + "_pool_size[2] = {" + + ",".join([str(i) for i in pool_size]) + + "}; \n" + ) if not skip_outputs: self._write_outputs(layer) - if padding == 'same': + if padding == "same": inshp = layer.input.shape[1:] outshp = layer.output.shape[1:] - pad_along_height = max((outshp[0] - 1) * stride[0] + - pool_size[0] - inshp[0], 0) + pad_along_height = max( + (outshp[0] - 1) * stride[0] + pool_size[0] - inshp[0], 0 + ) pad_top = int(pad_along_height // 2) pad_bottom = int(pad_along_height - pad_top) - pad_along_width = max((outshp[1] - 1) * stride[1] + - pool_size[1] - inshp[1], 0) - pad_left = pad_along_width//2 + pad_along_width = max( + (outshp[1] - 1) * stride[1] + pool_size[1] - inshp[1], 0 + ) + pad_left = pad_along_width // 2 pad_right = pad_along_width - pad_left - padshp = (inshp[0]+pad_along_height, - inshp[1]+pad_along_width, inshp[2]) + padshp = (inshp[0] + pad_along_height, inshp[1] + pad_along_width, inshp[2]) pad = [pad_top, pad_bottom, pad_left, pad_right] - self._write_weights_array2c(np.zeros(padshp), layer.name + - '_padded_input') - self.stack_vars += 'size_t ' + layer.name + \ - '_pad[4] = {' + ','.join([str(i) for i in pad]) + '}; \n' - self.stack_vars += 'float ' + layer.name + '_fill = -HUGE_VALF; \n' - self.stack_vars += '\n\n' + self._write_weights_array2c(np.zeros(padshp), layer.name + "_padded_input") + self.stack_vars += ( + "size_t " + + layer.name + + "_pad[4] = {" + + ",".join([str(i) for i in pad]) + + "}; \n" + ) + self.stack_vars += "float " + layer.name + "_fill = -HUGE_VALF; \n" + self.stack_vars += "\n\n" def _write_weights_GlobalMaxPooling1D(self, layer, **kwargs): return self._write_weights_GlobalPooling(layer, **kwargs) @@ -681,7 +923,7 @@ def _write_weights_GlobalAveragePooling3D(self, layer, **kwargs): def _write_weights_GlobalPooling(self, layer, skip_outputs=False): if not skip_outputs: self._write_outputs(layer) - self.stack_vars += '\n\n' + self.stack_vars += "\n\n" def _write_weights_Add(self, layer, **kwargs): return self._write_weights_Merge(layer, **kwargs) @@ -707,245 +949,347 @@ def _write_weights_Merge(self, layer, skip_outputs=False): inputs, outputs = get_layer_io_names(layer) for i, (inp, outp) in enumerate(zip(inputs, outputs)): num_tensors = len(inp) - self.stack_vars += 'size_t ' + layer.name + '_num_tensors' + str(i) + \ - ' = ' + str(num_tensors) + '; \n' - self.stack_vars += '\n\n' - - def _write_weights_Concatenate(self, layer, ): + self.stack_vars += ( + "size_t " + + layer.name + + "_num_tensors" + + str(i) + + " = " + + str(num_tensors) + + "; \n" + ) + self.stack_vars += "\n\n" + + def _write_weights_Concatenate( + self, + layer, + ): cfg = layer.get_config() inputs, outputs = get_layer_io_names(layer) for i, (inp, outp) in enumerate(zip(inputs, outputs)): outshp = layer.output.shape[1:] num_tensors = len(inp) - self.stack_vars += 'size_t ' + layer.name + '_num_tensors' + str(i) + \ - ' = ' + str(num_tensors) + '; \n' - ax = cfg['axis'] + self.stack_vars += ( + "size_t " + + layer.name + + "_num_tensors" + + str(i) + + " = " + + str(num_tensors) + + "; \n" + ) + ax = cfg["axis"] if ax < 0: ax += len(layer.input[0].shape) - self.stack_vars += 'size_t ' + layer.name + '_axis = ' +\ - str(ax-1) + '; \n' + self.stack_vars += ( + "size_t " + layer.name + "_axis = " + str(ax - 1) + "; \n" + ) if outp not in self.model_io[1]: - self._write_weights_array2c(np.zeros(outshp), - outp + '_output') - self.stack_vars += '\n\n' + self._write_weights_array2c(np.zeros(outshp), outp + "_output") + self.stack_vars += "\n\n" def _write_weights_ELU(self, layer): cfg = layer.get_config() - alpha = cfg['alpha'] - self.stack_vars += 'float ' + layer.name + \ - '_alpha = ' + str(alpha) + '; \n' - self.stack_vars += '\n\n' + alpha = cfg["alpha"] + self.stack_vars += "float " + layer.name + "_alpha = " + str(alpha) + "; \n" + self.stack_vars += "\n\n" def _write_weights_LeakyReLU(self, layer): cfg = layer.get_config() try: - alpha = cfg['alpha'] + alpha = cfg["alpha"] except KeyError: - alpha = cfg['negative_slope'] - self.stack_vars += 'float ' + layer.name + \ - '_alpha = ' + str(alpha) + '; \n' - self.stack_vars += '\n\n' + alpha = cfg["negative_slope"] + self.stack_vars += "float " + layer.name + "_alpha = " + str(alpha) + "; \n" + self.stack_vars += "\n\n" def _write_weights_ThresholdedReLU(self, layer): cfg = layer.get_config() - theta = cfg['theta'] - self.stack_vars = 'float ' + layer.name + \ - '_theta = ' + str(theta) + '; \n' - self.stack_vars += '\n\n' + theta = cfg["theta"] + self.stack_vars = "float " + layer.name + "_theta = " + str(theta) + "; \n" + self.stack_vars += "\n\n" def _write_weights_ReLU(self, layer): cfg = layer.get_config() - max_value = cfg['max_value'] - negative_slope = cfg['negative_slope'] - threshold = cfg['threshold'] + max_value = cfg["max_value"] + negative_slope = cfg["negative_slope"] + threshold = cfg["threshold"] if max_value is None: - max_value = 'HUGE_VALF' - self.stack_vars += 'float ' + layer.name + \ - '_max_value = ' + str(max_value) + '; \n' - self.stack_vars += 'float ' + layer.name + '_negative_slope = ' + \ - str(negative_slope) + '; \n' - self.stack_vars += 'float ' + layer.name + \ - '_threshold = ' + str(threshold) + '; \n' - self.stack_vars += '\n\n' + max_value = "HUGE_VALF" + self.stack_vars += ( + "float " + layer.name + "_max_value = " + str(max_value) + "; \n" + ) + self.stack_vars += ( + "float " + layer.name + "_negative_slope = " + str(negative_slope) + "; \n" + ) + self.stack_vars += ( + "float " + layer.name + "_threshold = " + str(threshold) + "; \n" + ) + self.stack_vars += "\n\n" def _write_weights_PReLU(self, layer): - self._write_weights_array2c( - layer.get_weights()[0], layer.name + '_alpha') - self.stack_vars += '\n\n' + self._write_weights_array2c(layer.get_weights()[0], layer.name + "_alpha") + self.stack_vars += "\n\n" def _write_weights_Reshape(self, layer, skip_outputs=False): cfg = layer.get_config() nm = layer.name if not skip_outputs: self._write_outputs(layer) - newshp = cfg['target_shape'] + newshp = cfg["target_shape"] newndim = len(newshp) - newshp = np.concatenate((newshp, np.ones(maxndim-newndim))) - self.stack_vars += 'size_t ' + nm + \ - '_newndim = ' + str(newndim) + '; \n' - self.stack_vars += 'size_t ' + nm + '_newshp[K2C_MAX_NDIM] = {' + \ - str(np.array2string(newshp.astype(int), - separator=',')[1:-1]) + '}; \n' - self.stack_vars += '\n\n' + newshp = np.concatenate((newshp, np.ones(maxndim - newndim))) + self.stack_vars += "size_t " + nm + "_newndim = " + str(newndim) + "; \n" + self.stack_vars += ( + "size_t " + + nm + + "_newshp[K2C_MAX_NDIM] = {" + + str(np.array2string(newshp.astype(int), separator=",")[1:-1]) + + "}; \n" + ) + self.stack_vars += "\n\n" def _write_weights_Permute(self, layer, skip_outputs=False): cfg = layer.get_config() if not skip_outputs: self._write_outputs(layer) - permute = np.array(cfg['dims']).astype(int) - 1 - self.stack_vars += 'size_t ' + layer.name + '_permute[' + str(permute.size) + '] = {' +\ - str(np.array2string(permute.astype(int), - separator=',')[1:-1]) + '}; \n' - self.stack_vars += '\n\n' + permute = np.array(cfg["dims"]).astype(int) - 1 + self.stack_vars += ( + "size_t " + + layer.name + + "_permute[" + + str(permute.size) + + "] = {" + + str(np.array2string(permute.astype(int), separator=",")[1:-1]) + + "}; \n" + ) + self.stack_vars += "\n\n" def _write_weights_RepeatVector(self, layer, skip_outputs=False): cfg = layer.get_config() if not skip_outputs: self._write_outputs(layer) - n = cfg['n'] - self.stack_vars += 'size_t ' + layer.name + '_n = ' + str(n) + '; \n' - self.stack_vars += '\n\n' + n = cfg["n"] + self.stack_vars += "size_t " + layer.name + "_n = " + str(n) + "; \n" + self.stack_vars += "\n\n" def _write_weights_Dot(self, layer, skip_outputs=False): cfg = layer.get_config() nm = layer.name if not skip_outputs: self._write_outputs(layer) - work_size = np.prod(layer.input[0].shape[1:]) + \ - np.prod(layer.input[1].shape[1:]) - axes = np.array(cfg['axes']) - 1 - self.stack_vars += 'size_t ' + nm + \ - '_axesA[1] = {' + str(axes[0]) + '}; \n' - self.stack_vars += 'size_t ' + nm + \ - '_axesB[1] = {' + str(axes[1]) + '}; \n' - self.stack_vars += 'size_t ' + nm + '_naxes = 1; \n' - self.stack_vars += 'float ' + nm + \ - '_fwork[' + str(work_size) + '] = {0}; \n' - self.stack_vars += 'int ' + nm + '_normalize = ' + \ - str(int(cfg['normalize'])) + '; \n' - self.stack_vars += '\n\n' + work_size = np.prod(layer.input[0].shape[1:]) + np.prod( + layer.input[1].shape[1:] + ) + axes = np.array(cfg["axes"]) - 1 + self.stack_vars += "size_t " + nm + "_axesA[1] = {" + str(axes[0]) + "}; \n" + self.stack_vars += "size_t " + nm + "_axesB[1] = {" + str(axes[1]) + "}; \n" + self.stack_vars += "size_t " + nm + "_naxes = 1; \n" + self.stack_vars += "float " + nm + "_fwork[" + str(work_size) + "] = {0}; \n" + self.stack_vars += ( + "int " + nm + "_normalize = " + str(int(cfg["normalize"])) + "; \n" + ) + self.stack_vars += "\n\n" def _write_weights_Embedding(self, layer, skip_outputs=False): nm = layer.name if not skip_outputs: self._write_outputs(layer) kernel = layer.get_weights()[0] - self._write_weights_array2c(kernel, nm+'_kernel') - self.stack_vars += '\n\n' + self._write_weights_array2c(kernel, nm + "_kernel") + self.stack_vars += "\n\n" def _write_weights_UpSampling1D(self, layer, skip_outputs=False): cfg = layer.get_config() nm = layer.name if not skip_outputs: self._write_outputs(layer) - size = cfg['size'] - self.stack_vars += 'size_t ' + nm + '_size = ' + str(size) + '; \n' - self.stack_vars += '\n\n' + size = cfg["size"] + self.stack_vars += "size_t " + nm + "_size = " + str(size) + "; \n" + self.stack_vars += "\n\n" def _write_weights_UpSampling2D(self, layer, skip_outputs=False): cfg = layer.get_config() nm = layer.name if not skip_outputs: self._write_outputs(layer) - size = cfg['size'] - self.stack_vars += 'size_t ' + nm + '_size[2] = {' + str(size[0]) + \ - ',' + str(size[1]) + '}; \n' - self.stack_vars += '\n\n' + size = cfg["size"] + self.stack_vars += ( + "size_t " + + nm + + "_size[2] = {" + + str(size[0]) + + "," + + str(size[1]) + + "}; \n" + ) + self.stack_vars += "\n\n" def _write_weights_UpSampling3D(self, layer, skip_outputs=False): cfg = layer.get_config() nm = layer.name if not skip_outputs: self._write_outputs(layer) - size = cfg['size'] - self.stack_vars += 'size_t ' + nm + '_size[3] = {' + str(size[0]) + \ - ',' + str(size[1]) + ',' + str(size[2]) + '}; \n' - self.stack_vars += '\n\n' + size = cfg["size"] + self.stack_vars += ( + "size_t " + + nm + + "_size[3] = {" + + str(size[0]) + + "," + + str(size[1]) + + "," + + str(size[2]) + + "}; \n" + ) + self.stack_vars += "\n\n" def _write_weights_Cropping1D(self, layer, skip_outputs=False): nm = layer.name if not skip_outputs: self._write_outputs(layer) cfg = layer.get_config() - crop_top = cfg['cropping'][0] - crop_bottom = cfg['cropping'][1] - self.stack_vars += 'size_t ' + nm + '_crop[2] = {' + str(crop_top) + ','\ - + str(crop_bottom) + '}; \n' - self.stack_vars += '\n\n' + crop_top = cfg["cropping"][0] + crop_bottom = cfg["cropping"][1] + self.stack_vars += ( + "size_t " + + nm + + "_crop[2] = {" + + str(crop_top) + + "," + + str(crop_bottom) + + "}; \n" + ) + self.stack_vars += "\n\n" def _write_weights_Cropping2D(self, layer, skip_outputs=False): nm = layer.name if not skip_outputs: self._write_outputs(layer) cfg = layer.get_config() - crop_top = cfg['cropping'][0][0] - crop_bottom = cfg['cropping'][0][1] - crop_left = cfg['cropping'][1][0] - crop_right = cfg['cropping'][1][1] - self.stack_vars += 'size_t ' + nm + '_crop[4] = {' + str(crop_top) + ','\ - + str(crop_bottom) + ',' + str(crop_left) + \ - ',' + str(crop_right) + '}; \n' - self.stack_vars += '\n\n' + crop_top = cfg["cropping"][0][0] + crop_bottom = cfg["cropping"][0][1] + crop_left = cfg["cropping"][1][0] + crop_right = cfg["cropping"][1][1] + self.stack_vars += ( + "size_t " + + nm + + "_crop[4] = {" + + str(crop_top) + + "," + + str(crop_bottom) + + "," + + str(crop_left) + + "," + + str(crop_right) + + "}; \n" + ) + self.stack_vars += "\n\n" def _write_weights_Cropping3D(self, layer, skip_outputs=False): nm = layer.name if not skip_outputs: self._write_outputs(layer) cfg = layer.get_config() - crop0 = cfg['cropping'][0][0] - crop1 = cfg['cropping'][0][1] - crop2 = cfg['cropping'][1][0] - crop3 = cfg['cropping'][1][1] - crop4 = cfg['cropping'][2][0] - crop5 = cfg['cropping'][2][1] - self.stack_vars += 'size_t ' + nm + '_crop[6] = {' + str(crop0) + ','\ - + str(crop1) + ',' + str(crop2) + ',' + str(crop3) + \ - ',' + str(crop4) + ',' + str(crop5) + '}; \n' - self.stack_vars += '\n\n' + crop0 = cfg["cropping"][0][0] + crop1 = cfg["cropping"][0][1] + crop2 = cfg["cropping"][1][0] + crop3 = cfg["cropping"][1][1] + crop4 = cfg["cropping"][2][0] + crop5 = cfg["cropping"][2][1] + self.stack_vars += ( + "size_t " + + nm + + "_crop[6] = {" + + str(crop0) + + "," + + str(crop1) + + "," + + str(crop2) + + "," + + str(crop3) + + "," + + str(crop4) + + "," + + str(crop5) + + "}; \n" + ) + self.stack_vars += "\n\n" def _write_weights_ZeroPadding1D(self, layer, skip_outputs=False): nm = layer.name if not skip_outputs: self._write_outputs(layer) cfg = layer.get_config() - pad_top = cfg['padding'][0] - pad_bottom = cfg['padding'][1] - self.stack_vars += 'size_t ' + nm + '_pad[2] = {' + str(pad_top) + ','\ - + str(pad_bottom) + '}; \n' - self.stack_vars += 'float ' + nm + '_fill = 0.0f; \n' - self.stack_vars += '\n\n' + pad_top = cfg["padding"][0] + pad_bottom = cfg["padding"][1] + self.stack_vars += ( + "size_t " + + nm + + "_pad[2] = {" + + str(pad_top) + + "," + + str(pad_bottom) + + "}; \n" + ) + self.stack_vars += "float " + nm + "_fill = 0.0f; \n" + self.stack_vars += "\n\n" def _write_weights_ZeroPadding2D(self, layer, skip_outputs=False): nm = layer.name if not skip_outputs: self._write_outputs(layer) cfg = layer.get_config() - pad_top = cfg['padding'][0][0] - pad_bottom = cfg['padding'][0][1] - pad_left = cfg['padding'][1][0] - pad_right = cfg['padding'][1][1] - self.stack_vars += 'size_t ' + nm + '_pad[4] = {' + str(pad_top) + ','\ - + str(pad_bottom) + ',' + str(pad_left) + \ - ',' + str(pad_right) + '}; \n' - self.stack_vars += 'float ' + nm + '_fill = 0.0f; \n' - self.stack_vars += '\n\n' + pad_top = cfg["padding"][0][0] + pad_bottom = cfg["padding"][0][1] + pad_left = cfg["padding"][1][0] + pad_right = cfg["padding"][1][1] + self.stack_vars += ( + "size_t " + + nm + + "_pad[4] = {" + + str(pad_top) + + "," + + str(pad_bottom) + + "," + + str(pad_left) + + "," + + str(pad_right) + + "}; \n" + ) + self.stack_vars += "float " + nm + "_fill = 0.0f; \n" + self.stack_vars += "\n\n" def _write_weights_ZeroPadding3D(self, layer, skip_outputs=False): nm = layer.name if not skip_outputs: self._write_outputs(layer) cfg = layer.get_config() - pad0 = cfg['padding'][0][0] - pad1 = cfg['padding'][0][1] - pad2 = cfg['padding'][1][0] - pad3 = cfg['padding'][1][1] - pad4 = cfg['padding'][2][0] - pad5 = cfg['padding'][2][1] - self.stack_vars += 'size_t ' + nm + '_pad[6] = {' + str(pad0) + ','\ - + str(pad1) + ',' + str(pad2) + ',' + str(pad3) + \ - ',' + str(pad4) + ',' + str(pad5) + '}; \n' - self.stack_vars += 'float ' + nm + '_fill = 0.0f; \n' - self.stack_vars += '\n\n' + pad0 = cfg["padding"][0][0] + pad1 = cfg["padding"][0][1] + pad2 = cfg["padding"][1][0] + pad3 = cfg["padding"][1][1] + pad4 = cfg["padding"][2][0] + pad5 = cfg["padding"][2][1] + self.stack_vars += ( + "size_t " + + nm + + "_pad[6] = {" + + str(pad0) + + "," + + str(pad1) + + "," + + str(pad2) + + "," + + str(pad3) + + "," + + str(pad4) + + "," + + str(pad5) + + "}; \n" + ) + self.stack_vars += "float " + nm + "_fill = 0.0f; \n" + self.stack_vars += "\n\n" def _write_weights_ActivityRegularization(self, layer): # no weights needed @@ -968,8 +1312,7 @@ def _write_weights_Flatten(self, layer): for i, outp in enumerate(outputs): inshp = layer.input.shape[1:] if outp not in self.model_io[1]: - self._write_weights_array2c( - np.zeros(inshp).flatten(), outp + '_output') + self._write_weights_array2c(np.zeros(inshp).flatten(), outp + "_output") def _write_weights_Activation(self, layer): # no weights needed @@ -985,9 +1328,13 @@ def _write_weights_TFOpLambda(self, layer): def _write_weights_TensorFlowOpLayer(self, layer, skip_outputs=False): # Special case when tf.split is used # no weights needed - if 'split' in layer.name: + if "split" in layer.name: if not skip_outputs: self._write_outputs(layer) else: - raise AssertionError('Unsupported TensorFlowOpLayer: ' + layer.name + '\n' - + 'Currently only split operation is supported.') + raise AssertionError( + "Unsupported TensorFlowOpLayer: " + + layer.name + + "\n" + + "Currently only split operation is supported." + ) diff --git a/tests/test_convtranspose_layers.py b/tests/test_convtranspose_layers.py index 197a098..b6da520 100644 --- a/tests/test_convtranspose_layers.py +++ b/tests/test_convtranspose_layers.py @@ -5,13 +5,14 @@ #!/usr/bin/env python3 +import time import unittest + import keras -from keras2c import keras2c_main -import time +import numpy as np from test_core_layers import build_and_run -import numpy as np +from keras2c import keras2c_main __author__ = "Anchal Gupta" __copyright__ = "Copyright 2024, Anchal Gupta" @@ -21,32 +22,69 @@ class TestConvolutionTransposeLayers(unittest.TestCase): - """tests for convolution layers""" + """tests for convolution transpose layers""" def test_Conv1DTranspose1(self): - for tno in range (10): - nh = np.random.randint(2, 50) - nc = np.random.randint(1, 50) - nf = np.random.randint(1, 50) - nk = np.random.randint(1, nh) - strides = np.random.randint(1, max(nk, 2)) - inshp = (nh, nc) + for tno in range(10): + # Variable names match k2c_conv1d_transpose in k2c_conv_transpose_layer.c + n_height = np.random.randint(2, 50) + n_channels = np.random.randint(1, 50) + n_filters = np.random.randint(1, 50) + k_size = np.random.randint(1, n_height) + stride = np.random.randint(1, max(k_size, 2)) + inshp = (n_height, n_channels) + if tno % 2 == 0: + padding = "valid" + else: + padding = "same" + dilation_rate = 1 + activation = None + a = keras.layers.Input(inshp) + b = keras.layers.Conv1DTranspose( + filters=n_filters, + kernel_size=k_size, + strides=stride, + padding=padding, + dilation_rate=dilation_rate, + activation=activation, + use_bias=False, + )(a) + model = keras.models.Model(inputs=a, outputs=b) + name = "test___Conv1DTranspose1" + str(int(time.time())) + keras2c_main.k2c(model, name) + rcode = build_and_run(name) + self.assertEqual(rcode, 0) + + def test_Conv2DTranspose1(self): + for tno in range(10): + # Variable names match k2c_conv2d_transpose in k2c_conv_transpose_layer.c + in_rows = np.random.randint(2, 25) + in_cols = np.random.randint(2, 25) + in_channels = np.random.randint(1, 25) + n_filters = np.random.randint(1, 25) + k_rows = np.random.randint(1, in_rows) + k_cols = np.random.randint(1, in_cols) + stride_h = np.random.randint(1, max(k_rows, 2)) + stride_w = np.random.randint(1, max(k_cols, 2)) + inshp = (in_rows, in_cols, in_channels) if tno % 2 == 0: - padding = 'valid' + padding = "valid" else: - padding = 'same' + padding = "same" dilation_rate = 1 - activation = None # 'relu' + activation = None a = keras.layers.Input(inshp) - b = keras.layers.Conv1DTranspose(filters=nf, - kernel_size=nk, - strides=strides, - padding=padding, - dilation_rate=dilation_rate, - activation=activation, - use_bias=False)(a) + b = keras.layers.Conv2DTranspose( + filters=n_filters, + kernel_size=(k_rows, k_cols), + strides=(stride_h, stride_w), + padding=padding, + dilation_rate=dilation_rate, + activation=activation, + use_bias=False, + )(a) model = keras.models.Model(inputs=a, outputs=b) - name = 'test___Conv1DTranspose1' + str(int(time.time())) + name = "test___Conv2DTranspose1" + str(int(time.time())) keras2c_main.k2c(model, name) rcode = build_and_run(name) self.assertEqual(rcode, 0)