#include <torch/torch.h>
#include <vector>
#include <string>
struct VGGConfig {
std::vector<int> conv_layers;
bool use_batch_norm;
int num_classes;
};
class VGGImpl : public torch::nn::Module {
public:
explicit VGGImpl(const VGGConfig& config) {
int in_channels = 3;
int layer_idx = 0;
for (size_t i = 0; i < config.conv_layers.size(); ++i) {
int out_channels = config.conv_layers[i];
if (out_channels == -1) {
features->push_back(
torch::nn::MaxPool2d(torch::nn::MaxPool2dOptions(2).stride(2)));
} else {
auto conv = torch::nn::Conv2d(
torch::nn::Conv2dOptions(in_channels, out_channels, 3)
.padding(1));
features->push_back(conv);
register_module("conv_" + std::to_string(++layer_idx), conv);
if (config.use_batch_norm) {
auto bn = torch::nn::BatchNorm2d(out_channels);
features->push_back(bn);
register_module("bn_" + std::to_string(layer_idx), bn);
}
features->push_back(torch::nn::ReLU());
in_channels = out_channels;
}
}
classifier = register_module("classifier",
torch::nn::Sequential(
torch::nn::Linear(512 * 7 * 7, 4096),
torch::nn::ReLU(),
torch::nn::Dropout(0.5),
torch::nn::Linear(4096, 4096),
torch::nn::ReLU(),
torch::nn::Dropout(0.5),
torch::nn::Linear(4096, config.num_classes)
);
}
torch::Tensor forward(torch::Tensor x) {
x = features->forward(x);
x = torch::flatten(x, 1);
x = classifier->forward(x);
return x;
}
private:
torch::nn::Sequential features{nullptr};
torch::nn::Sequential classifier{nullptr};
};
TORCH_MODULE(VGG);
VGG create_vgg(int version = 16, int num_classes = 1000, bool batch_norm = false) {
std::vector<int> config;
switch(version) {
case 11:
config = {64, -1, 128, -1, 256, 256, -1, 512, 512, -1, 512, 512, -1};
break;
case 13:
config = {64, 64, -1, 128, 128, -1, 256, 256, -1, 512, 512, -1, 512, 512, -1};
break;
case 16:
config = {64, 64, -1, 128, 128, -1, 256, 256, 256, -1, 512, 512, 512, -1, 512, 512, 512, -1};
break;
case 19:
config = {64, 64, -1, 128, 128, -1, 256, 256, 256, 256, -1,
512, 512, 512, 512, -1, 512, 512, 512, 512, -1};
break;
default:
throw std::runtime_error("Unsupported VGG version");
}
return VGG(VGGConfig{config, batch_norm, num_classes});
}
#include <torch/torch.h>
#include <opencv2/opencv.hpp>
#include <vector>
#include <string>
#include <filesystem>
class CustomDataset : public torch::data::Dataset<CustomDataset> {
public:
explicit CustomDataset(const std::string& root_dir,
const std::vector<std::string>& class_names,
int image_size = 224,
bool is_train = true)
: image_size_(image_size), is_train_(is_train) {
int label = 0;
for (const auto& class_name : class_names) {
std::string class_dir = root_dir + "/" + class_name;
for (const auto& entry : std::filesystem::directory_iterator(class_dir)) {
if (entry.path().extension() == ".jpg" ||
entry.path().extension() == ".png") {
image_paths_.push_back(entry.path().string());
labels_.push_back(label);
}
}
label++;
}
class_map_ = class_names;
}
torch::data::Example<> get(size_t index) override {
cv::Mat image = cv::imread(image_paths_[index]);
if (image.empty()) {
throw std::runtime_error("Failed to load image: " + image_paths_[index]);
}
image = preprocess_image(image);
auto tensor = torch::from_blob(
image.data, {image.rows, image.cols, 3}, torch::kByte);
tensor = tensor.permute({2, 0, 1}).to(torch::kFloat32).div_(255);
tensor[0] = tensor[0].sub_(0.485).div_(0.229);
tensor[1] = tensor[1].sub_(0.456).div_(0.224);
tensor[2] = tensor[2].sub_(0.406).div_(0.225);
int64_t label = labels_[index];
return {tensor, torch::tensor(label)};
}
torch::optional<size_t> size() const override {
return image_paths_.size();
}
std::string get_class_name(int label) const {
return class_map_.at(label);
}
private:
cv::Mat preprocess_image(cv::Mat image) {
if (is_train_) {
if (torch::rand(1).item<float>() > 0.5) {
cv::flip(image, image, 1);
}
int h = image.rows;
int w = image.cols;
int new_h = h * 0.8 + torch::rand(1).item<float>() * h * 0.2;
int new_w = w * 0.8 + torch::rand(1).item<float>() * w * 0.2;
cv::resize(image, image, cv::Size(new_w, new_h));
int y = torch::rand(1).item<int>() % (new_h - image_size_);
int x = torch::rand(1).item<int>() % (new_w - image_size_);
cv::Rect roi(x, y, image_size_, image_size_);
image = image(roi).clone();
}
else {
cv::resize(image, image, cv::Size(image_size_ * 256 / 224, image_size_ * 256 / 224));
int center_y = image.rows / 2;
int center_x = image.cols / 2;
cv::Rect roi(center_x - image_size_/2, center_y - image_size_/2, image_size_, image_size_);
image = image(roi).clone();
}
return image;
}
std::vector<std::string> image_paths_;
std::vector<int64_t> labels_;
std::vector<std::string> class_map_;
int image_size_;
bool is_train_;
};
#include "vgg.h"
#include "custom_dataset.h"
#include <torch/torch.h>
#include <iostream>
#include <memory>
void train_custom(const std::string& train_dir,
const std::string& val_dir,
const std::vector<std::string>& class_names,
int num_epochs = 50,
int batch_size = 32) {
auto model = create_vgg(16, class_names.size());
model->to(torch::kCUDA);
auto train_dataset = CustomDataset(train_dir, class_names, 224, true)
.map(torch::data::transforms::Stack<>());
auto val_dataset = CustomDataset(val_dir, class_names, 224, false)
.map(torch::data::transforms::Stack<>());
auto train_loader = torch::data::make_data_loader(
std::move(train_dataset),
torch::data::DataLoaderOptions()
.batch_size(batch_size)
.workers(4)
.shuffle(true));
auto val_loader = torch::data::make_data_loader(
std::move(val_dataset),
torch::data::DataLoaderOptions()
.batch_size(batch_size)
.workers(4)
.shuffle(false));
torch::optim::Adam optimizer(
model->parameters(),
torch::optim::AdamOptions(1e-4).weight_decay(1e-4));
auto criterion = torch::nn::CrossEntropyLoss();
for (int epoch = 1; epoch <= num_epochs; ++epoch) {
model->train();
float running_loss = 0.0;
int correct = 0;
int total = 0;
for (auto& batch : *train_loader) {
auto data = batch.data.to(torch::kCUDA);
auto targets = batch.target.to(torch::kCUDA);
optimizer.zero_grad();
auto outputs = model->forward(data);
auto loss = criterion(outputs, targets);
loss.backward();
optimizer.step();
running_loss += loss.item<float>();
auto predicted = torch::argmax(outputs, 1);
total += targets.size(0);
correct += (predicted == targets).sum().item<int>();
}
float train_acc = 100.0 * correct / total;
float train_loss = running_loss / total;
model->eval();
running_loss = 0.0;
correct = 0;
total = 0;
for (auto& batch : *val_loader) {
auto data = batch.data.to(torch::kCUDA);
auto targets = batch.target.to(torch::kCUDA);
auto outputs = model->forward(data);
auto loss = criterion(outputs, targets);
running_loss += loss.item<float>();
auto predicted = torch::argmax(outputs, 1);
total += targets.size(0);
correct += (predicted == targets).sum().item<int>();
}
float val_acc = 100.0 * correct / total;
float val_loss = running_loss / total;
std::cout << "Epoch [" << epoch << "/" << num_epochs << "]\n"
<< "Train Loss: " << train_loss << " Acc: " << train_acc << "%\n"
<< "Val Loss: " << val_loss << " Acc: " << val_acc << "%\n\n";
}
torch::save(model, "custom_vgg16_model.pt");
}
int main() {
try {
std::vector<std::string> class_names = {"cat", "dog", "bird"};
train_custom("data/train", "data/val", class_names, 50, 32);
} catch (const std::exception& e) {
std::cerr << "Error: " << e.what() << std::endl;
return 1;
}
return 0;
}
#include "vgg.h"
#include "custom_dataset.h"
#include <torch/torch.h>
#include <opencv2/opencv.hpp>
#include <iostream>
int main() {
try {
std::vector<std::string> class_names = {"cat", "dog", "bird"};
auto model = create_vgg(16, class_names.size());
torch::load(model, "custom_vgg16_model.pt");
model->eval();
model->to(torch::kCUDA);
cv::Mat image = cv::imread("test_image.jpg");
if (image.empty()) {
throw std::runtime_error("Failed to load image");
}
int image_size = 224;
cv::resize(image, image, cv::Size(image_size * 256 / 224, image_size * 256 / 224));
int center_y = image.rows / 2;
int center_x = image.cols / 2;
cv::Rect roi(center_x - image_size/2, center_y - image_size/2, image_size, image_size);
image = image(roi).clone();
auto tensor = torch::from_blob(
image.data, {image.rows, image.cols, 3}, torch::kByte);
tensor = tensor.permute({2, 0, 1}).to(torch::kFloat32).div_(255);
tensor[0] = tensor[0].sub_(0.485).div_(0.229);
tensor[1] = tensor[1].sub_(0.456).div_(0.224);
tensor[2] = tensor[2].sub_(0.406).div_(0.225);
auto input_tensor = tensor.unsqueeze(0).to(torch::kCUDA);
auto output = model->forward(input_tensor);
auto probs = torch::softmax(output, 1);
auto predicted_idx = torch::argmax(probs).item<int>();
std::cout << "Predicted class: " << class_names[predicted_idx]
<< " (" << predicted_idx << ")\n";
std::cout << "Confidence: " << probs[0][predicted_idx].item<float>() * 100 << "%\n";
for (size_t i = 0; i < class_names.size(); ++i) {
std::cout << class_names[i] << ": "
<< probs[0][i].item<float>() * 100 << "%\n";
}
} catch (const std::exception& e) {
std::cerr << "Error: " << e.what() << std::endl;
return 1;
}
return 0;
}