검색결과 리스트
image processing cuda에 해당되는 글 1건
- 2015.07.28 Image Processing with CUDA
글
Image Processing with CUDA
프로그래밍/CUDA
2015. 7. 28. 02:39
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | // main.cpp #include <cv.hpp> #include "kernel.h" using namespace cv; int main() { // Open a webcamera VideoCapture camera(0); Mat frame; if (!camera.isOpened()) return -1; camera >> frame; // create CPU/GPU shared images - one for the initial and one for the result Mat sGray(frame.size(),CV_8U,createImageBuffer(frame.size().width * frame.size().height)); Mat dGray(frame.size(),CV_8U,createImageBuffer(frame.size().width * frame.size().height)); Mat eGray(frame.size(),CV_8U,createImageBuffer(frame.size().width * frame.size().height)); cvtColor(frame, dGray, CV_BGR2GRAY); cvtColor(frame, eGray, CV_BGR2GRAY); // Create the capture windows namedWindow("Source"); namedWindow("Greyscale"); namedWindow("Blurred"); namedWindow("Sobel"); char c; // Loop while capturing images while (1) { // Capture the image and store a gray conversion for the gpu camera >> frame; cv::cvtColor(frame, sGray, CV_BGR2GRAY); boxfilter(frame.size().width, frame.size().height, sGray.data, dGray.data, 3, 3); //boxfilterCPU(frame.size().width, frame.size().height, sGray.data, dGray.data, 3, 3); sobelfilter(frame.size().width, frame.size().height, dGray.data, eGray.data); // Show the results cv::imshow("Source", frame); cv::imshow("Greyscale", sGray); cv::imshow("Blurred", dGray); cv::imshow("Sobel", eGray); c = cv::waitKey(10); if (c == 27) break; } // Exit destroyImageBuffer(sGray.data); destroyImageBuffer(dGray.data); destroyImageBuffer(eGray.data); return 0; } | cs |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 | // kernel.h #ifndef _KERNEL_H_ #define _KERNEL_H_ #include <iostream> void boxfilter(int iw, int ih, unsigned char *source, unsigned char *dest, int bw, int bh); void boxfilterCPU(int iw, int ih, unsigned char *src, unsigned char *dst, int bw, int bh); void sobelfilter(int iw, int ih, unsigned char *source, unsigned char *dest); unsigned char* createImageBuffer(unsigned int bytes); void destroyImageBuffer(unsigned char* bytes); #endif | cs |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | #include "cuda_runtime.h" #include "device_launch_parameters.h" #include <stdio.h> #include "kernel.h" void boxfilterCPU(int iw, int ih, unsigned char *src, unsigned char *dst, int bw, int bh) { for (int m = 0; m < ih; m++) { for (int n = 0; n < iw; n++) { int count = 0; float sum = 0.0; for (int j = -(bh / 2); j <= (bh / 2); j++) { for (int i = -(bw / 2); i <= (bw / 2); i++) { // Verify that this offset is within the image boundaries if ((n + i) < iw && (n + i) >= 0 && (m + j) < ih && (m + j) >= 0) { sum += (float)src[((m + j) * iw) + (n + i)]; count++; } } } // Average the sum sum /= (float)count; dst[(m * iw) + n] = (unsigned char)sum; } } } __global__ void boxfilter_kernel(int iw, int ih, unsigned char *source, unsigned char *dest, int bw, int bh) { // Calculate our pixel's location int x = (blockIdx.x * blockDim.x) + threadIdx.x; int y = (blockIdx.y * blockDim.y) + threadIdx.y; // Variables to store the sum int count = 0; float sum = 0.0; // Do the blur operation by summing the surround pixels for (int j = -(bh / 2); j <= (bh / 2); j++) { for (int i = -(bw / 2); i <= (bw / 2); i++) { // Verify that this offset is within the image boundaries if ((x + i) < iw && (x + i) >= 0 && (y + j) < ih && (y + j) >= 0) { sum += (float)source[((y + j) * iw) + (x + i)]; count++; } } } // Average the sum sum /= (float)count; dest[(y * iw) + x] = (unsigned char)sum; } __global__ void sobelfilter_kernel(int iw, int ih, unsigned char *source, unsigned char *dest) { // Calculate our pixel's location int x = (blockIdx.x * blockDim.x) + threadIdx.x; int y = (blockIdx.y * blockDim.y) + threadIdx.y; // Operate only if we are in the correct boundaries if (x > 0 && x < iw - 1 && y > 0 && y < ih - 1) { int gx = -source[iw*(y - 1) + (x - 1)] + source[iw*(y - 1) + (x + 1)] + -2 * source[iw*(y)+(x - 1)] + 2 * source[iw*(y)+(x + 1)] + -source[iw*(y + 1) + (x - 1)] + source[iw*(y + 1) + (x + 1)]; int gy = -source[iw*(y - 1) + (x - 1)] - 2 * source[iw*(y - 1) + (x)] - source[iw*(y - 1) + (x + 1)] + source[iw*(y + 1) + (x - 1)] + 2 * source[iw*(y + 1) + (x)] + source[iw*(y + 1) + (x + 1)]; dest[iw*y + x] = (int)sqrt((float)(gx)*(float)(gx)+(float)(gy)*(float)(gy)); } } void boxfilter(int iw, int ih, unsigned char *source, unsigned char *dest, int bw, int bh) { // allocate memory for the bitmap in GPU memory unsigned char *dev_source, *dev_dest; cudaHostGetDevicePointer(&dev_source, source, 0); cudaHostGetDevicePointer(&dev_dest, dest, 0); //cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags() // Run the boxfilter kernel dim3 blocks(iw / 16, ih / 16); dim3 threads(16, 16); // Execute the kernel boxfilter_kernel << <blocks, threads >> >(iw, ih, dev_source, dev_dest, bw, bh); cudaThreadSynchronize(); } void sobelfilter(int iw, int ih, unsigned char *source, unsigned char *dest) { // allocate memory for the bitmap in GPU memory unsigned char *dev_source, *dev_dest; cudaHostGetDevicePointer(&dev_source, source, 0); cudaHostGetDevicePointer(&dev_dest, dest, 0); // Run the boxfilter kernel dim3 blocks(iw / 16, ih / 16); dim3 threads(16, 16); // Execute the kernel sobelfilter_kernel << <blocks, threads >> >(iw, ih, dev_source, dev_dest); cudaThreadSynchronize(); } unsigned char* createImageBuffer(unsigned int bytes) { unsigned char *ptr = NULL; cudaSetDeviceFlags(cudaDeviceMapHost); cudaHostAlloc(&ptr, bytes, cudaHostAllocMapped); return ptr; } void destroyImageBuffer(unsigned char* bytes) { cudaFreeHost(bytes); } | cs |
'프로그래밍 > CUDA' 카테고리의 다른 글
CUDA 스레드 구조 (0) | 2014.12.06 |
---|---|
CUDA 기본 문법 정리 (0) | 2014.12.06 |