言語処理系およびライブラリごとの画像処理時間を比較

言語処理系（JavaScript, Python, C++, Java, C#.NET）およびライブラリ（OpenCV, Pillow）ごとに、簡単な画像処理を行う時間を計測した。

処理内容

カラー画像に対して、グレースケール変換と二値化を行う。

元画像

結果画像

グレースケール変換はOpenCV内部処理に合わせた以下のBT.601式とした。
```
 0.299 * R + 0.587 * G + 0.114 * B 
```
二値化閾値は128。

実装方法

処理系ごとにありがちな実装を再現した。完全に同一の処理ではなく、入出力データ形式や細かな処理方式（メモリ確保有無や丸め処理など）が異なる。
時間計測範囲の実装を以下に示すが、この前後で画像入出力を行い、正常にグレースケール変換と二値化が行われることを確認した。

JavaScript / Edge,Chrome （入出力データ形式: ImageData）

 const binTh = 128;
 const data = imgData.data
 for (let i=0; i<data.length; i+=4){
  	const gry = Math.round(0.299 * data[i] + 0.587 * data[i+1] + 0.114 * data[i+2]);
  	data[i] = data[i+1] = data[i+2] = (gry < binTh) ? 0 : 255;
 }

Pillow / Python （入出力データ形式: Image）

 bin_th = 128
 gray = img.convert("L")
 binary = gray.point(lambda x: 255 if x > bin_th else 0)

Numpy / Python （入出力データ形式: NumPy配列）

 bin_th = 128
 gray = np.rint(0.299 * img[:, :, 2] + 0.587 * img[:, :, 1] + 0.114 * img[:, :, 0])
 binary = np.where((gray > bin_th) , 255, 0)

C++ （入出力データ形式: 4byte整数配列）

 #define Colorref2Red(RGB) ((unsigned char)(RGB)) 
 #define Colorref2Green(RGB) ((unsigned char)(((unsigned long) (RGB)) >> 8)) 
 #define Colorref2Blue(RGB) ((unsigned char)((RGB) >> 16)) 
 #define Rgb2Colorref(r, g ,b)  ((unsigned long)(((unsigned char)(r) | \
     (((unsigned short)(unsigned char)(g)) << 8)) | \
     (((unsigned long)(unsigned char)(b)) << 16)))
 const int binTh = 128;
 int cnt = width * height;
 unsigned long* ptr = img;
 while (cnt--) {
	const int bin = (std::round(0.299 * Colorref2Red(*ptr) + 0.587 * Colorref2Green(*ptr) + 0.114 * Colorref2Blue(*ptr)) < binTh) ? 0 : 255;
	*ptr++ = Rgb2Colorref(bin, bin, bin);
 }

Java （入出力データ形式: BufferedImage）

 int width = img.getWidth();
 int height = img.getHeight();
 int binTh = 128;
 for (int y = 0; y < height; y++) {
	for (int x = 0; x < width; x++) {
		int col = img.getRGB( x, y );
		int gray =  (int)(0.299 * (double)((col >> 16)&0xff) + 0.587 * (double)((col >> 8)&0xff) + 0.114 * (double)(col&0xff));
		int binary = (gray < binTh) ? 0 : 255;
		img.setRGB( x, y, binary << 16 | binary << 8 | binary );
	}
 }

C#.NET （入出力データ形式: Bitmap）

 int binTh = 128;
 for (int y = 0; y < bitmap.Height; y++)
 {
	for (int x = 0; x < bitmap.Width; x++)
	{
		Color col = bitmap.GetPixel(x, y);
		int gray = (int)((col.R * 0.299) + (col.G * 0.587) + (col.B * 0.114));
		int bin = (gray < binTh) ? 0 : 255;
		bitmap.SetPixel(x, y, Color.FromArgb(bin, bin, bin));
	}
 }

OpenCV.js / JavaScript / Edge,Chrome （入出力データ形式: Mat）

 const binTh = 128;
 cv.cvtColor(src, dst, cv.COLOR_RGBA2GRAY, 0);
 cv.threshold(dst, binary, binTh, 255, cv.THRESH_BINARY);

OpenCV / Python （入出力データ形式: Mat）

 bin_th = 128
 gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
 ret, binary  = cv2.threshold(gray, bin_th, 255, cv2.THRESH_BINARY)

OpenCV / C++ （入出力データ形式: Mat）

 const int binTh = 128;
 cvtColor(img, gray, COLOR_BGR2GRAY);
 threshold(gray, binary, binTh, 255, THRESH_BINARY);

OpenCV / Java （入出力データ形式: Mat）

 double minTh = 128;
 Imgproc.cvtColor(img, gray, Imgproc.COLOR_BGR2GRAY);
 Imgproc.threshold(gray, binary, minTh, 255, Imgproc.THRESH_BINARY);

OpenCVSharp / C#.NET （入出力データ形式: Mat）

 int binTh = 128;
 Mat gray = img.CvtColor(ColorConversionCodes.BGR2GRAY);
 Mat binary = gray.Threshold(binTh, 255, ThresholdTypes.Binary);

処理時間計測

実験環境はWindows / Intel Core i7-1260P 2.1GHz
実験実施日は2024年10月

実験画像

4094x3780画素

7230x5428画素

14364x11356画素

※ 4094x3780画素の画像は国土地理院の空中写真

処理時間計測値（単位：ms）

処理時間グラフ表示

実験画像（選択した画像の処理時間をグラフ表示する） 4094x3780画素 7230x5428画素 14364x11356画素

※ C#.NETの処理時間はグラフ描画エリアに収まっていない

感触

入出力データ形式や細かな処理方式が異なるため、純粋に処理系の優劣を比較できるデータになっていない点に留意する必要があるが、大まかな傾向は掴める。
JavaScriptがEdgeとChromeでほぼ同等の処理時間になるのは、ともにChromiumベースでJavsScriptエンジンが同じGoogleV8であるためと考えられる。
OpenCVを使用する場合は、ブラウザ内で利用するOpenCV.jsとそれ以外で処理時間が異なるが、OpenCV未使用の場合に比べると高速である。
OpenCV.js以外のOpenCVを使用する場合は、処理系に関係なく概ね同等の処理時間になる。 C++とJavaからOpenCVを使用するのが最も高速だが大きな差異ではないため、対象プラットフォームで使いやすいプログラミング環境を利用するのが良い。
OpenCV.jsはOpenCVのWebAssembly版サブセットでリソースの制約があるブラウザ内で動作するため性能が落ちると考えられる。本実験では性能比較のため、画面に収まらない巨大な画像をそのままの解像度で処理したが、ブラウザ内で行う処理としては不自然とも言える。ちなみに、OpenCV公式サイトのOpenCV.js記事に掲載されているサンプルは画像を画面表示サイズに縮小してから処理する実装になっているため高速に動作する。
言語処理系の核心が昔も今も「最適化」であることに変わりないが、プログラムの「生産性」の重要性が高まり、ライブラリを含む「プログラミング環境」が重視されるようになっている。先人の考えたアルゴリズムを組み合わせて利用するタイプの一般的な処理を実現する場合は、型チェックやメモリ管理は言語処理系に任せ、高速なライブラリを利用して必要機能を実装するのが得策である。完全に新しいアルゴリズムをゼロから開発する場合は既存のライブラリを利用できないため自分で作るしかないが、 C/C++で実装すればほとんどの言語からFFIあるいはBindingと呼ばれる仕組みで直接呼び出して利用できるため、高性能と生産性を両立させた開発が可能になる。