summaryrefslogtreecommitdiff
path: root/app/api/ocr/utils/imageRotation.ts
diff options
context:
space:
mode:
Diffstat (limited to 'app/api/ocr/utils/imageRotation.ts')
-rw-r--r--app/api/ocr/utils/imageRotation.ts583
1 files changed, 383 insertions, 200 deletions
diff --git a/app/api/ocr/utils/imageRotation.ts b/app/api/ocr/utils/imageRotation.ts
index fe9cf840..6d59dace 100644
--- a/app/api/ocr/utils/imageRotation.ts
+++ b/app/api/ocr/utils/imageRotation.ts
@@ -1,244 +1,427 @@
+// ============================================================================
// app/api/ocr/utils/imageRotation.ts
-// Sharp을 사용한 서버 사이드 이미지 회전
+// PDF 페이지 처리 기능이 추가된 이미지 회전 유틸리티
+// ============================================================================
import sharp from 'sharp';
+import { promises as fs } from 'fs';
+import path from 'path';
+import { randomUUID } from 'crypto';
+import { execFile } from 'child_process';
+import { promisify } from 'util';
+import { tmpdir } from 'os';
+
+const exec = promisify(execFile);
/**
- * 서버 사이드에서 이미지를 회전시킵니다
- * @param base64 - base64 인코딩된 이미지 데이터
- * @param degrees - 회전 각도 (0, 90, 180, 270)
- * @returns Promise<string> - 회전된 이미지의 base64 데이터
+ * PDF 페이지 수를 확인하는 함수
*/
-export async function rotateImageBase64(base64: string, degrees: number): Promise<string> {
+export async function getPDFPageCount(pdfBuffer: Buffer): Promise<number> {
+ const tmp = tmpdir();
+ const id = randomUUID();
+ const pdfPath = path.join(tmp, `${id}.pdf`);
+
try {
- console.log(`🔄 Rotating image by ${degrees} degrees...`);
+ await fs.writeFile(pdfPath, pdfBuffer);
- // base64를 Buffer로 변환
- const inputBuffer = Buffer.from(base64, 'base64');
+ // pdfinfo 명령어로 페이지 수 확인
+ const { stdout } = await exec('pdfinfo', [pdfPath]);
+ const pageMatch = stdout.match(/Pages:\s+(\d+)/);
+ const pageCount = pageMatch ? parseInt(pageMatch[1]) : 1;
- // 회전 각도에 따른 처리
- let rotatedBuffer: Buffer;
-
- switch (degrees) {
- case 0:
- // 회전 없음
- rotatedBuffer = inputBuffer;
- break;
-
- case 90:
- rotatedBuffer = await sharp(inputBuffer)
- .rotate(90)
- .jpeg({
- quality: 90,
- progressive: true
- })
- .toBuffer();
- break;
-
- case 180:
- rotatedBuffer = await sharp(inputBuffer)
- .rotate(180)
- .jpeg({
- quality: 90,
- progressive: true
- })
- .toBuffer();
- break;
-
- case 270:
- rotatedBuffer = await sharp(inputBuffer)
- .rotate(270)
- .jpeg({
- quality: 90,
- progressive: true
- })
- .toBuffer();
- break;
-
- default:
- console.warn(`⚠️ Unsupported rotation angle: ${degrees}°. Using original image.`);
- rotatedBuffer = inputBuffer;
- }
-
- // Buffer를 다시 base64로 변환
- const rotatedBase64 = rotatedBuffer.toString('base64');
-
- console.log(`✅ Image rotated successfully (${degrees}°)`);
- return rotatedBase64;
+ console.log(`📄 PDF has ${pageCount} pages`);
+ return pageCount;
} catch (error) {
- console.error(`❌ Error rotating image by ${degrees}°:`, error);
- console.warn('Using original image due to rotation error');
- return base64; // 실패시 원본 반환
+ console.warn('❌ Could not get PDF page count, trying alternative method:', error);
+
+ // pdfinfo가 실패하면 pdftoppm으로 테스트
+ try {
+ await exec('pdftoppm', ['-l', '1', '-null', pdfPath]);
+ return 1; // 최소 1페이지는 있음
+ } catch {
+ console.warn('⚠️ Could not determine page count, assuming 1 page');
+ return 1;
+ }
+ } finally {
+ await fs.rm(pdfPath, { force: true }).catch(() => {});
}
}
/**
- * 이미지 품질을 개선합니다
- * @param base64 - base64 인코딩된 이미지 데이터
- * @returns Promise<string> - 개선된 이미지의 base64 데이터
+ * base64 데이터 유효성 검증
*/
-export async function enhanceImageQuality(base64: string): Promise<string> {
- try {
- console.log('🎨 Enhancing image quality...');
-
- const inputBuffer = Buffer.from(base64, 'base64');
-
- const enhancedBuffer = await sharp(inputBuffer)
- .resize(2000, 2000, {
- fit: 'inside',
- withoutEnlargement: true
- })
- // 개별 매개변수 방식으로 수정
- .sharpen(1, 1, 2) // sigma, m1(flat), m2(jagged)
- .normalize() // 히스토그램 정규화
- .gamma(1.1) // 약간의 감마 보정
- .jpeg({
- quality: 95,
- progressive: true,
- mozjpeg: true
- })
- .toBuffer();
-
- const enhancedBase64 = enhancedBuffer.toString('base64');
-
- console.log('✅ Image quality enhanced');
- return enhancedBase64;
-
- } catch (error) {
- console.error('❌ Error enhancing image:', error);
- return base64;
- }
+export function validateBase64Image(base64: string): { isValid: boolean; error?: string; size?: number } {
+ try {
+ if (!base64 || typeof base64 !== 'string') {
+ return { isValid: false, error: 'Base64 data is empty or invalid type' };
+ }
+
+ const cleanBase64 = base64.replace(/^data:image\/[a-z]+;base64,/, '');
+
+ if (!/^[A-Za-z0-9+/]*={0,2}$/.test(cleanBase64)) {
+ return { isValid: false, error: 'Invalid base64 format' };
+ }
+
+ if (cleanBase64.length < 100) {
+ return { isValid: false, error: `Base64 too short: ${cleanBase64.length} characters` };
+ }
+
+ const buffer = Buffer.from(cleanBase64, 'base64');
+ const bufferSize = buffer.length;
+
+ if (bufferSize < 1) {
+ return { isValid: false, error: `Buffer too small: ${bufferSize} bytes` };
+ }
+
+ if (bufferSize > 52428800) { // 50MB
+ return { isValid: false, error: `Buffer too large: ${bufferSize} bytes (max: 50MB)` };
+ }
+
+ return { isValid: true, size: bufferSize };
+
+ } catch (error) {
+ return {
+ isValid: false,
+ error: `Base64 validation failed: ${error instanceof Error ? error.message : 'Unknown error'}`
+ };
+ }
}
/**
- * PDF를 고품질 이미지로 변환합니다
- * @param pdfBuffer - PDF Buffer 데이터
- * @param pageIndex - 변환할 페이지 인덱스 (0부터 시작)
- * @returns Promise<string> - 변환된 이미지의 base64 데이터
+ * 서버 사이드에서 이미지를 회전시킵니다
*/
-export async function convertPDFToImage(pdfBuffer: Buffer, pageIndex: number = 0): Promise<string> {
- try {
- console.log(`📄 Converting PDF page ${pageIndex + 1} to image...`);
-
- // pdf2pic 라이브러리 사용
- const pdf2pic = require('pdf2pic');
-
- const convert = pdf2pic.fromBuffer(pdfBuffer, {
- density: 300, // 300 DPI for high quality
- saveFilename: "page",
- savePath: "/tmp", // 임시 경로
- format: "jpeg",
- width: 2480, // A4 크기 @ 300 DPI
- height: 3508,
- quality: 100
- });
-
- const result = await convert(pageIndex + 1, { responseType: "buffer" });
- const base64 = result.buffer.toString('base64');
-
- console.log('✅ PDF converted to image successfully');
- return base64;
-
- } catch (error) {
- console.error('❌ Error converting PDF to image:', error);
- throw new Error('Failed to convert PDF to image');
- }
+export async function rotateImageBase64(base64: string, degrees: number): Promise<string> {
+ try {
+ console.log(`🔄 === ROTATING IMAGE BY ${degrees}° ===`);
+
+ const validation = validateBase64Image(base64);
+ if (!validation.isValid) {
+ throw new Error(`Invalid input base64: ${validation.error}`);
+ }
+
+ console.log(`✅ Input validation passed - size: ${validation.size} bytes`);
+
+ const cleanBase64 = base64.replace(/^data:image\/[a-z]+;base64,/, '');
+ const inputBuffer = Buffer.from(cleanBase64, 'base64');
+
+ console.log(`📊 Input buffer created: ${inputBuffer.length} bytes`);
+
+ const normalizedDegrees = ((degrees % 360) + 360) % 360;
+ console.log(`📐 Normalized rotation: ${normalizedDegrees}°`);
+
+ let rotatedBuffer: Buffer;
+
+ if (normalizedDegrees === 0) {
+ console.log(' ↻ No rotation needed, applying quality enhancement...');
+ rotatedBuffer = await sharp(inputBuffer)
+ .jpeg({
+ quality: 90,
+ progressive: true,
+ mozjpeg: true
+ })
+ .toBuffer();
+ } else {
+ console.log(` 🔄 Applying ${normalizedDegrees}° rotation...`);
+
+ const sharpInstance = sharp(inputBuffer);
+ const metadata = await sharpInstance.metadata();
+ console.log(` 📏 Original image: ${metadata.width}x${metadata.height}, format: ${metadata.format}`);
+
+ rotatedBuffer = await sharpInstance
+ .rotate(normalizedDegrees)
+ .jpeg({
+ quality: 90,
+ progressive: true,
+ mozjpeg: true
+ })
+ .toBuffer();
+ }
+
+ console.log(`📊 Output buffer created: ${rotatedBuffer.length} bytes`);
+
+ if (rotatedBuffer.length === 0) {
+ throw new Error('Rotation resulted in empty buffer');
+ }
+
+ if (rotatedBuffer.length > 52428800) {
+ throw new Error(`Rotated image too large: ${rotatedBuffer.length} bytes`);
+ }
+
+ const rotatedBase64 = rotatedBuffer.toString('base64');
+
+ const outputValidation = validateBase64Image(rotatedBase64);
+ if (!outputValidation.isValid) {
+ throw new Error(`Invalid output base64: ${outputValidation.error}`);
+ }
+
+ console.log(`✅ Image rotated successfully: ${outputValidation.size} bytes`);
+ console.log(`📈 Size change: ${inputBuffer.length} → ${outputValidation.size} bytes`);
+
+ return rotatedBase64;
+
+ } catch (error) {
+ console.error(`❌ Error rotating image by ${degrees}°:`, error);
+
+ if (error instanceof Error) {
+ if (error.message.includes('Input buffer contains unsupported image format')) {
+ console.error(' 🖼️ Unsupported image format - try converting to JPEG first');
+ } else if (error.message.includes('Input image exceeds pixel limit')) {
+ console.error(' 📏 Image too large for processing');
+ } else if (error.message.includes('premature close')) {
+ console.error(' 🔧 Corrupted image data');
+ }
+ }
+
+ const originalValidation = validateBase64Image(base64);
+ if (originalValidation.isValid) {
+ console.warn(' ↩️ Using original image due to rotation error');
+ return base64;
+ } else {
+ throw new Error(`Rotation failed and original image is invalid: ${originalValidation.error}`);
+ }
+ }
}
/**
- * 이미지에서 텍스트 방향을 감지합니다
- * @param base64 - base64 인코딩된 이미지 데이터
- * @returns Promise<number> - 감지된 올바른 회전 각도
+ * 이미지 품질을 개선합니다
*/
-export async function detectTextOrientation(base64: string): Promise<number> {
- // 이 함수는 간단한 방향 감지를 시뮬레이션합니다
- // 실제로는 더 정교한 알고리즘이 필요할 수 있습니다
-
- console.log('🧭 Detecting text orientation...');
-
- const rotations = [0, 90, 180, 270];
- const scores: { rotation: number; score: number }[] = [];
-
- for (const rotation of rotations) {
+export async function enhanceImageQuality(base64: string): Promise<string> {
try {
- const rotatedBase64 = await rotateImageBase64(base64, rotation);
-
- // 간단한 품질 측정 (실제로는 OCR API 호출이나 다른 방법 사용)
- const score = await estimateTextQuality(rotatedBase64);
- scores.push({ rotation, score });
-
- console.log(` ${rotation}°: quality score = ${score.toFixed(3)}`);
-
+ console.log('🎨 === ENHANCING IMAGE QUALITY ===');
+
+ const validation = validateBase64Image(base64);
+ if (!validation.isValid) {
+ console.warn(`⚠️ Invalid input for enhancement: ${validation.error}`);
+ return base64;
+ }
+
+ console.log(`✅ Enhancement input valid: ${validation.size} bytes`);
+
+ const cleanBase64 = base64.replace(/^data:image\/[a-z]+;base64,/, '');
+ const inputBuffer = Buffer.from(cleanBase64, 'base64');
+
+ const sharpInstance = sharp(inputBuffer);
+ const metadata = await sharpInstance.metadata();
+
+ console.log(`📏 Original: ${metadata.width}x${metadata.height}, ${metadata.format}`);
+
+ const maxDimension = 2000;
+ let needsResize = false;
+
+ if (metadata.width && metadata.height) {
+ needsResize = metadata.width > maxDimension || metadata.height > maxDimension;
+ }
+
+ let enhancedBuffer: Buffer;
+
+ if (needsResize) {
+ console.log(`📐 Resizing to fit ${maxDimension}px...`);
+ enhancedBuffer = await sharpInstance
+ .resize(maxDimension, maxDimension, {
+ fit: 'inside',
+ withoutEnlargement: true
+ })
+ .sharpen(0.5, 1, 2)
+ .normalize()
+ .gamma(1.1)
+ .jpeg({
+ quality: 95,
+ progressive: true,
+ mozjpeg: true
+ })
+ .toBuffer();
+ } else {
+ console.log('📐 No resize needed, applying enhancement only...');
+ enhancedBuffer = await sharpInstance
+ .sharpen(0.5, 1, 2)
+ .normalize()
+ .gamma(1.1)
+ .jpeg({
+ quality: 95,
+ progressive: true,
+ mozjpeg: true
+ })
+ .toBuffer();
+ }
+
+ const enhancedBase64 = enhancedBuffer.toString('base64');
+
+ const outputValidation = validateBase64Image(enhancedBase64);
+ if (!outputValidation.isValid) {
+ console.warn(`⚠️ Enhancement resulted in invalid image: ${outputValidation.error}`);
+ return base64;
+ }
+
+ console.log(`✅ Image enhanced: ${validation.size} → ${outputValidation.size} bytes`);
+ return enhancedBase64;
+
} catch (error) {
- console.warn(` ${rotation}°: Failed to test orientation`);
- scores.push({ rotation, score: 0 });
+ console.error('❌ Error enhancing image:', error);
+ return base64;
}
- }
-
- const bestOrientation = scores.reduce((best, current) =>
- current.score > best.score ? current : best
- );
-
- console.log(`🎯 Best orientation detected: ${bestOrientation.rotation}°`);
- return bestOrientation.rotation;
}
/**
- * 이미지의 텍스트 품질을 추정합니다 (간단한 버전)
+ * PDF를 이미지로 변환합니다 (개선된 버전)
*/
-async function estimateTextQuality(base64: string): Promise<number> {
+export async function convertPDFToImage(
+ pdfBuffer: Buffer,
+ pageIndex = 0,
+ dpi = 300,
+): Promise<string> {
+ const tmp = tmpdir();
+ const id = randomUUID();
+ const pdfPath = path.join(tmp, `${id}.pdf`);
+ const outPrefix = path.join(tmp, id);
+ const jpgPath = `${outPrefix}.jpg`;
+
try {
- const buffer = Buffer.from(base64, 'base64');
+ console.log(`📄 Converting PDF page ${pageIndex + 1} to image (${dpi} DPI)...`);
- // Sharp을 사용해 이미지 통계 분석
- const stats = await sharp(buffer)
- .greyscale()
- .stats();
-
- // 간단한 품질 지표 계산
- // 실제로는 더 복잡한 알고리즘이 필요합니다
- const contrast = stats.channels[0].max - stats.channels[0].min;
- const sharpness = stats.channels[0].stdev;
+ // 1) PDF 임시 저장
+ await fs.writeFile(pdfPath, pdfBuffer);
+
+ // 2) pdftoppm 실행
+ const page = pageIndex + 1; // pdftoppm은 1-based
+ await exec('pdftoppm', [
+ '-jpeg',
+ '-singlefile',
+ '-r', dpi.toString(),
+ '-f', page.toString(),
+ '-l', page.toString(),
+ pdfPath,
+ outPrefix,
+ ], { maxBuffer: 1024 * 1024 * 50 }); // 50MB 버퍼
+
+ // 3) 결과 읽어 base64 변환
+ const img = await fs.readFile(jpgPath);
+ const base64 = img.toString('base64');
- return (contrast + sharpness) / 510; // 0-1 범위로 정규화
+ console.log(`✅ PDF page ${pageIndex + 1} converted successfully: ${img.length} bytes`);
+ return base64;
+
} catch (error) {
- return 0;
+ console.error(`❌ Error converting PDF page ${pageIndex + 1}:`, error);
+ throw new Error(`Failed to convert PDF page ${pageIndex + 1}: ${error instanceof Error ? error.message : 'Unknown error'}`);
+ } finally {
+ // 4) 임시 파일 정리
+ await fs.rm(pdfPath, { force: true }).catch(() => {});
+ await fs.rm(jpgPath, { force: true }).catch(() => {});
}
}
/**
- * 이미지가 회전이 필요한지 빠르게 체크합니다
- * @param base64 - base64 인코딩된 이미지 데이터
- * @returns Promise<boolean> - 회전이 필요하면 true
+ * 파일 형식을 JPEG로 정규화
*/
-export async function needsRotation(base64: string): Promise<boolean> {
- try {
- const buffer = Buffer.from(base64, 'base64');
-
- // 이미지 메타데이터 확인
- const metadata = await sharp(buffer).metadata();
-
- // EXIF 방향 정보가 있으면 회전 필요
- if (metadata.orientation && metadata.orientation > 1) {
- console.log(`📐 EXIF orientation detected: ${metadata.orientation}`);
- return true;
+export async function normalizeImageFormat(base64: string): Promise<string> {
+ try {
+ console.log('🔄 Normalizing image format to JPEG...');
+
+ const validation = validateBase64Image(base64);
+ if (!validation.isValid) {
+ throw new Error(`Cannot normalize invalid image: ${validation.error}`);
+ }
+
+ const cleanBase64 = base64.replace(/^data:image\/[a-z]+;base64,/, '');
+ const inputBuffer = Buffer.from(cleanBase64, 'base64');
+
+ const normalizedBuffer = await sharp(inputBuffer)
+ .jpeg({
+ quality: 90,
+ progressive: true
+ })
+ .toBuffer();
+
+ const normalizedBase64 = normalizedBuffer.toString('base64');
+
+ const outputValidation = validateBase64Image(normalizedBase64);
+ if (!outputValidation.isValid) {
+ throw new Error(`Normalization failed: ${outputValidation.error}`);
+ }
+
+ console.log(`✅ Format normalized: ${validation.size} → ${outputValidation.size} bytes`);
+ return normalizedBase64;
+
+ } catch (error) {
+ console.error('❌ Error normalizing image format:', error);
+ throw error;
}
-
- // 가로/세로 비율 체크 (일반적으로 문서는 세로가 더 긺)
- if (metadata.width && metadata.height) {
- const aspectRatio = metadata.width / metadata.height;
- if (aspectRatio > 1.5) {
- console.log(`📐 Wide aspect ratio detected: ${aspectRatio.toFixed(2)}`);
- return true; // 너무 가로로 긴 이미지는 회전 필요할 가능성
- }
+}
+
+// 기존 함수들
+export async function detectTextOrientation(base64: string): Promise<number> {
+ console.log('🧭 Detecting text orientation...');
+
+ const rotations = [0, 90, 180, 270];
+ const scores: { rotation: number; score: number }[] = [];
+
+ for (const rotation of rotations) {
+ try {
+ const rotatedBase64 = await rotateImageBase64(base64, rotation);
+ const score = await estimateTextQuality(rotatedBase64);
+ scores.push({ rotation, score });
+
+ console.log(` ${rotation}°: quality score = ${score.toFixed(3)}`);
+
+ } catch (error) {
+ console.warn(` ${rotation}°: Failed to test orientation`);
+ scores.push({ rotation, score: 0 });
+ }
+ }
+
+ const bestOrientation = scores.reduce((best, current) =>
+ current.score > best.score ? current : best
+ );
+
+ console.log(`🎯 Best orientation detected: ${bestOrientation.rotation}°`);
+ return bestOrientation.rotation;
+}
+
+async function estimateTextQuality(base64: string): Promise<number> {
+ try {
+ const cleanBase64 = base64.replace(/^data:image\/[a-z]+;base64,/, '');
+ const buffer = Buffer.from(cleanBase64, 'base64');
+
+ const stats = await sharp(buffer)
+ .greyscale()
+ .stats();
+
+ const contrast = stats.channels[0].max - stats.channels[0].min;
+ const sharpness = stats.channels[0].stdev;
+
+ return (contrast + sharpness) / 510;
+
+ } catch (error) {
+ return 0;
+ }
+}
+
+export async function needsRotation(base64: string): Promise<boolean> {
+ try {
+ const cleanBase64 = base64.replace(/^data:image\/[a-z]+;base64,/, '');
+ const buffer = Buffer.from(cleanBase64, 'base64');
+
+ const metadata = await sharp(buffer).metadata();
+
+ if (metadata.orientation && metadata.orientation > 1) {
+ console.log(`📐 EXIF orientation detected: ${metadata.orientation}`);
+ return true;
+ }
+
+ if (metadata.width && metadata.height) {
+ const aspectRatio = metadata.width / metadata.height;
+ if (aspectRatio > 1.5) {
+ console.log(`📐 Wide aspect ratio detected: ${aspectRatio.toFixed(2)}`);
+ return true;
+ }
+ }
+
+ return false;
+
+ } catch (error) {
+ console.warn('Error checking if rotation needed:', error);
+ return false;
}
-
- return false;
-
- } catch (error) {
- console.warn('Error checking if rotation needed:', error);
- return false;
- }
} \ No newline at end of file