diff options
Diffstat (limited to 'app/api/ocr/utils/imageRotation.ts')
| -rw-r--r-- | app/api/ocr/utils/imageRotation.ts | 583 |
1 files changed, 383 insertions, 200 deletions
diff --git a/app/api/ocr/utils/imageRotation.ts b/app/api/ocr/utils/imageRotation.ts index fe9cf840..6d59dace 100644 --- a/app/api/ocr/utils/imageRotation.ts +++ b/app/api/ocr/utils/imageRotation.ts @@ -1,244 +1,427 @@ +// ============================================================================ // app/api/ocr/utils/imageRotation.ts -// Sharp을 사용한 서버 사이드 이미지 회전 +// PDF 페이지 처리 기능이 추가된 이미지 회전 유틸리티 +// ============================================================================ import sharp from 'sharp'; +import { promises as fs } from 'fs'; +import path from 'path'; +import { randomUUID } from 'crypto'; +import { execFile } from 'child_process'; +import { promisify } from 'util'; +import { tmpdir } from 'os'; + +const exec = promisify(execFile); /** - * 서버 사이드에서 이미지를 회전시킵니다 - * @param base64 - base64 인코딩된 이미지 데이터 - * @param degrees - 회전 각도 (0, 90, 180, 270) - * @returns Promise<string> - 회전된 이미지의 base64 데이터 + * PDF 페이지 수를 확인하는 함수 */ -export async function rotateImageBase64(base64: string, degrees: number): Promise<string> { +export async function getPDFPageCount(pdfBuffer: Buffer): Promise<number> { + const tmp = tmpdir(); + const id = randomUUID(); + const pdfPath = path.join(tmp, `${id}.pdf`); + try { - console.log(`🔄 Rotating image by ${degrees} degrees...`); + await fs.writeFile(pdfPath, pdfBuffer); - // base64를 Buffer로 변환 - const inputBuffer = Buffer.from(base64, 'base64'); + // pdfinfo 명령어로 페이지 수 확인 + const { stdout } = await exec('pdfinfo', [pdfPath]); + const pageMatch = stdout.match(/Pages:\s+(\d+)/); + const pageCount = pageMatch ? parseInt(pageMatch[1]) : 1; - // 회전 각도에 따른 처리 - let rotatedBuffer: Buffer; - - switch (degrees) { - case 0: - // 회전 없음 - rotatedBuffer = inputBuffer; - break; - - case 90: - rotatedBuffer = await sharp(inputBuffer) - .rotate(90) - .jpeg({ - quality: 90, - progressive: true - }) - .toBuffer(); - break; - - case 180: - rotatedBuffer = await sharp(inputBuffer) - .rotate(180) - .jpeg({ - quality: 90, - progressive: true - }) - .toBuffer(); - break; - - case 270: - rotatedBuffer = await sharp(inputBuffer) - .rotate(270) - .jpeg({ - quality: 90, - progressive: true - }) - .toBuffer(); - break; - - default: - console.warn(`⚠️ Unsupported rotation angle: ${degrees}°. Using original image.`); - rotatedBuffer = inputBuffer; - } - - // Buffer를 다시 base64로 변환 - const rotatedBase64 = rotatedBuffer.toString('base64'); - - console.log(`✅ Image rotated successfully (${degrees}°)`); - return rotatedBase64; + console.log(`📄 PDF has ${pageCount} pages`); + return pageCount; } catch (error) { - console.error(`❌ Error rotating image by ${degrees}°:`, error); - console.warn('Using original image due to rotation error'); - return base64; // 실패시 원본 반환 + console.warn('❌ Could not get PDF page count, trying alternative method:', error); + + // pdfinfo가 실패하면 pdftoppm으로 테스트 + try { + await exec('pdftoppm', ['-l', '1', '-null', pdfPath]); + return 1; // 최소 1페이지는 있음 + } catch { + console.warn('⚠️ Could not determine page count, assuming 1 page'); + return 1; + } + } finally { + await fs.rm(pdfPath, { force: true }).catch(() => {}); } } /** - * 이미지 품질을 개선합니다 - * @param base64 - base64 인코딩된 이미지 데이터 - * @returns Promise<string> - 개선된 이미지의 base64 데이터 + * base64 데이터 유효성 검증 */ -export async function enhanceImageQuality(base64: string): Promise<string> { - try { - console.log('🎨 Enhancing image quality...'); - - const inputBuffer = Buffer.from(base64, 'base64'); - - const enhancedBuffer = await sharp(inputBuffer) - .resize(2000, 2000, { - fit: 'inside', - withoutEnlargement: true - }) - // 개별 매개변수 방식으로 수정 - .sharpen(1, 1, 2) // sigma, m1(flat), m2(jagged) - .normalize() // 히스토그램 정규화 - .gamma(1.1) // 약간의 감마 보정 - .jpeg({ - quality: 95, - progressive: true, - mozjpeg: true - }) - .toBuffer(); - - const enhancedBase64 = enhancedBuffer.toString('base64'); - - console.log('✅ Image quality enhanced'); - return enhancedBase64; - - } catch (error) { - console.error('❌ Error enhancing image:', error); - return base64; - } +export function validateBase64Image(base64: string): { isValid: boolean; error?: string; size?: number } { + try { + if (!base64 || typeof base64 !== 'string') { + return { isValid: false, error: 'Base64 data is empty or invalid type' }; + } + + const cleanBase64 = base64.replace(/^data:image\/[a-z]+;base64,/, ''); + + if (!/^[A-Za-z0-9+/]*={0,2}$/.test(cleanBase64)) { + return { isValid: false, error: 'Invalid base64 format' }; + } + + if (cleanBase64.length < 100) { + return { isValid: false, error: `Base64 too short: ${cleanBase64.length} characters` }; + } + + const buffer = Buffer.from(cleanBase64, 'base64'); + const bufferSize = buffer.length; + + if (bufferSize < 1) { + return { isValid: false, error: `Buffer too small: ${bufferSize} bytes` }; + } + + if (bufferSize > 52428800) { // 50MB + return { isValid: false, error: `Buffer too large: ${bufferSize} bytes (max: 50MB)` }; + } + + return { isValid: true, size: bufferSize }; + + } catch (error) { + return { + isValid: false, + error: `Base64 validation failed: ${error instanceof Error ? error.message : 'Unknown error'}` + }; + } } /** - * PDF를 고품질 이미지로 변환합니다 - * @param pdfBuffer - PDF Buffer 데이터 - * @param pageIndex - 변환할 페이지 인덱스 (0부터 시작) - * @returns Promise<string> - 변환된 이미지의 base64 데이터 + * 서버 사이드에서 이미지를 회전시킵니다 */ -export async function convertPDFToImage(pdfBuffer: Buffer, pageIndex: number = 0): Promise<string> { - try { - console.log(`📄 Converting PDF page ${pageIndex + 1} to image...`); - - // pdf2pic 라이브러리 사용 - const pdf2pic = require('pdf2pic'); - - const convert = pdf2pic.fromBuffer(pdfBuffer, { - density: 300, // 300 DPI for high quality - saveFilename: "page", - savePath: "/tmp", // 임시 경로 - format: "jpeg", - width: 2480, // A4 크기 @ 300 DPI - height: 3508, - quality: 100 - }); - - const result = await convert(pageIndex + 1, { responseType: "buffer" }); - const base64 = result.buffer.toString('base64'); - - console.log('✅ PDF converted to image successfully'); - return base64; - - } catch (error) { - console.error('❌ Error converting PDF to image:', error); - throw new Error('Failed to convert PDF to image'); - } +export async function rotateImageBase64(base64: string, degrees: number): Promise<string> { + try { + console.log(`🔄 === ROTATING IMAGE BY ${degrees}° ===`); + + const validation = validateBase64Image(base64); + if (!validation.isValid) { + throw new Error(`Invalid input base64: ${validation.error}`); + } + + console.log(`✅ Input validation passed - size: ${validation.size} bytes`); + + const cleanBase64 = base64.replace(/^data:image\/[a-z]+;base64,/, ''); + const inputBuffer = Buffer.from(cleanBase64, 'base64'); + + console.log(`📊 Input buffer created: ${inputBuffer.length} bytes`); + + const normalizedDegrees = ((degrees % 360) + 360) % 360; + console.log(`📐 Normalized rotation: ${normalizedDegrees}°`); + + let rotatedBuffer: Buffer; + + if (normalizedDegrees === 0) { + console.log(' ↻ No rotation needed, applying quality enhancement...'); + rotatedBuffer = await sharp(inputBuffer) + .jpeg({ + quality: 90, + progressive: true, + mozjpeg: true + }) + .toBuffer(); + } else { + console.log(` 🔄 Applying ${normalizedDegrees}° rotation...`); + + const sharpInstance = sharp(inputBuffer); + const metadata = await sharpInstance.metadata(); + console.log(` 📏 Original image: ${metadata.width}x${metadata.height}, format: ${metadata.format}`); + + rotatedBuffer = await sharpInstance + .rotate(normalizedDegrees) + .jpeg({ + quality: 90, + progressive: true, + mozjpeg: true + }) + .toBuffer(); + } + + console.log(`📊 Output buffer created: ${rotatedBuffer.length} bytes`); + + if (rotatedBuffer.length === 0) { + throw new Error('Rotation resulted in empty buffer'); + } + + if (rotatedBuffer.length > 52428800) { + throw new Error(`Rotated image too large: ${rotatedBuffer.length} bytes`); + } + + const rotatedBase64 = rotatedBuffer.toString('base64'); + + const outputValidation = validateBase64Image(rotatedBase64); + if (!outputValidation.isValid) { + throw new Error(`Invalid output base64: ${outputValidation.error}`); + } + + console.log(`✅ Image rotated successfully: ${outputValidation.size} bytes`); + console.log(`📈 Size change: ${inputBuffer.length} → ${outputValidation.size} bytes`); + + return rotatedBase64; + + } catch (error) { + console.error(`❌ Error rotating image by ${degrees}°:`, error); + + if (error instanceof Error) { + if (error.message.includes('Input buffer contains unsupported image format')) { + console.error(' 🖼️ Unsupported image format - try converting to JPEG first'); + } else if (error.message.includes('Input image exceeds pixel limit')) { + console.error(' 📏 Image too large for processing'); + } else if (error.message.includes('premature close')) { + console.error(' 🔧 Corrupted image data'); + } + } + + const originalValidation = validateBase64Image(base64); + if (originalValidation.isValid) { + console.warn(' ↩️ Using original image due to rotation error'); + return base64; + } else { + throw new Error(`Rotation failed and original image is invalid: ${originalValidation.error}`); + } + } } /** - * 이미지에서 텍스트 방향을 감지합니다 - * @param base64 - base64 인코딩된 이미지 데이터 - * @returns Promise<number> - 감지된 올바른 회전 각도 + * 이미지 품질을 개선합니다 */ -export async function detectTextOrientation(base64: string): Promise<number> { - // 이 함수는 간단한 방향 감지를 시뮬레이션합니다 - // 실제로는 더 정교한 알고리즘이 필요할 수 있습니다 - - console.log('🧭 Detecting text orientation...'); - - const rotations = [0, 90, 180, 270]; - const scores: { rotation: number; score: number }[] = []; - - for (const rotation of rotations) { +export async function enhanceImageQuality(base64: string): Promise<string> { try { - const rotatedBase64 = await rotateImageBase64(base64, rotation); - - // 간단한 품질 측정 (실제로는 OCR API 호출이나 다른 방법 사용) - const score = await estimateTextQuality(rotatedBase64); - scores.push({ rotation, score }); - - console.log(` ${rotation}°: quality score = ${score.toFixed(3)}`); - + console.log('🎨 === ENHANCING IMAGE QUALITY ==='); + + const validation = validateBase64Image(base64); + if (!validation.isValid) { + console.warn(`⚠️ Invalid input for enhancement: ${validation.error}`); + return base64; + } + + console.log(`✅ Enhancement input valid: ${validation.size} bytes`); + + const cleanBase64 = base64.replace(/^data:image\/[a-z]+;base64,/, ''); + const inputBuffer = Buffer.from(cleanBase64, 'base64'); + + const sharpInstance = sharp(inputBuffer); + const metadata = await sharpInstance.metadata(); + + console.log(`📏 Original: ${metadata.width}x${metadata.height}, ${metadata.format}`); + + const maxDimension = 2000; + let needsResize = false; + + if (metadata.width && metadata.height) { + needsResize = metadata.width > maxDimension || metadata.height > maxDimension; + } + + let enhancedBuffer: Buffer; + + if (needsResize) { + console.log(`📐 Resizing to fit ${maxDimension}px...`); + enhancedBuffer = await sharpInstance + .resize(maxDimension, maxDimension, { + fit: 'inside', + withoutEnlargement: true + }) + .sharpen(0.5, 1, 2) + .normalize() + .gamma(1.1) + .jpeg({ + quality: 95, + progressive: true, + mozjpeg: true + }) + .toBuffer(); + } else { + console.log('📐 No resize needed, applying enhancement only...'); + enhancedBuffer = await sharpInstance + .sharpen(0.5, 1, 2) + .normalize() + .gamma(1.1) + .jpeg({ + quality: 95, + progressive: true, + mozjpeg: true + }) + .toBuffer(); + } + + const enhancedBase64 = enhancedBuffer.toString('base64'); + + const outputValidation = validateBase64Image(enhancedBase64); + if (!outputValidation.isValid) { + console.warn(`⚠️ Enhancement resulted in invalid image: ${outputValidation.error}`); + return base64; + } + + console.log(`✅ Image enhanced: ${validation.size} → ${outputValidation.size} bytes`); + return enhancedBase64; + } catch (error) { - console.warn(` ${rotation}°: Failed to test orientation`); - scores.push({ rotation, score: 0 }); + console.error('❌ Error enhancing image:', error); + return base64; } - } - - const bestOrientation = scores.reduce((best, current) => - current.score > best.score ? current : best - ); - - console.log(`🎯 Best orientation detected: ${bestOrientation.rotation}°`); - return bestOrientation.rotation; } /** - * 이미지의 텍스트 품질을 추정합니다 (간단한 버전) + * PDF를 이미지로 변환합니다 (개선된 버전) */ -async function estimateTextQuality(base64: string): Promise<number> { +export async function convertPDFToImage( + pdfBuffer: Buffer, + pageIndex = 0, + dpi = 300, +): Promise<string> { + const tmp = tmpdir(); + const id = randomUUID(); + const pdfPath = path.join(tmp, `${id}.pdf`); + const outPrefix = path.join(tmp, id); + const jpgPath = `${outPrefix}.jpg`; + try { - const buffer = Buffer.from(base64, 'base64'); + console.log(`📄 Converting PDF page ${pageIndex + 1} to image (${dpi} DPI)...`); - // Sharp을 사용해 이미지 통계 분석 - const stats = await sharp(buffer) - .greyscale() - .stats(); - - // 간단한 품질 지표 계산 - // 실제로는 더 복잡한 알고리즘이 필요합니다 - const contrast = stats.channels[0].max - stats.channels[0].min; - const sharpness = stats.channels[0].stdev; + // 1) PDF 임시 저장 + await fs.writeFile(pdfPath, pdfBuffer); + + // 2) pdftoppm 실행 + const page = pageIndex + 1; // pdftoppm은 1-based + await exec('pdftoppm', [ + '-jpeg', + '-singlefile', + '-r', dpi.toString(), + '-f', page.toString(), + '-l', page.toString(), + pdfPath, + outPrefix, + ], { maxBuffer: 1024 * 1024 * 50 }); // 50MB 버퍼 + + // 3) 결과 읽어 base64 변환 + const img = await fs.readFile(jpgPath); + const base64 = img.toString('base64'); - return (contrast + sharpness) / 510; // 0-1 범위로 정규화 + console.log(`✅ PDF page ${pageIndex + 1} converted successfully: ${img.length} bytes`); + return base64; + } catch (error) { - return 0; + console.error(`❌ Error converting PDF page ${pageIndex + 1}:`, error); + throw new Error(`Failed to convert PDF page ${pageIndex + 1}: ${error instanceof Error ? error.message : 'Unknown error'}`); + } finally { + // 4) 임시 파일 정리 + await fs.rm(pdfPath, { force: true }).catch(() => {}); + await fs.rm(jpgPath, { force: true }).catch(() => {}); } } /** - * 이미지가 회전이 필요한지 빠르게 체크합니다 - * @param base64 - base64 인코딩된 이미지 데이터 - * @returns Promise<boolean> - 회전이 필요하면 true + * 파일 형식을 JPEG로 정규화 */ -export async function needsRotation(base64: string): Promise<boolean> { - try { - const buffer = Buffer.from(base64, 'base64'); - - // 이미지 메타데이터 확인 - const metadata = await sharp(buffer).metadata(); - - // EXIF 방향 정보가 있으면 회전 필요 - if (metadata.orientation && metadata.orientation > 1) { - console.log(`📐 EXIF orientation detected: ${metadata.orientation}`); - return true; +export async function normalizeImageFormat(base64: string): Promise<string> { + try { + console.log('🔄 Normalizing image format to JPEG...'); + + const validation = validateBase64Image(base64); + if (!validation.isValid) { + throw new Error(`Cannot normalize invalid image: ${validation.error}`); + } + + const cleanBase64 = base64.replace(/^data:image\/[a-z]+;base64,/, ''); + const inputBuffer = Buffer.from(cleanBase64, 'base64'); + + const normalizedBuffer = await sharp(inputBuffer) + .jpeg({ + quality: 90, + progressive: true + }) + .toBuffer(); + + const normalizedBase64 = normalizedBuffer.toString('base64'); + + const outputValidation = validateBase64Image(normalizedBase64); + if (!outputValidation.isValid) { + throw new Error(`Normalization failed: ${outputValidation.error}`); + } + + console.log(`✅ Format normalized: ${validation.size} → ${outputValidation.size} bytes`); + return normalizedBase64; + + } catch (error) { + console.error('❌ Error normalizing image format:', error); + throw error; } - - // 가로/세로 비율 체크 (일반적으로 문서는 세로가 더 긺) - if (metadata.width && metadata.height) { - const aspectRatio = metadata.width / metadata.height; - if (aspectRatio > 1.5) { - console.log(`📐 Wide aspect ratio detected: ${aspectRatio.toFixed(2)}`); - return true; // 너무 가로로 긴 이미지는 회전 필요할 가능성 - } +} + +// 기존 함수들 +export async function detectTextOrientation(base64: string): Promise<number> { + console.log('🧭 Detecting text orientation...'); + + const rotations = [0, 90, 180, 270]; + const scores: { rotation: number; score: number }[] = []; + + for (const rotation of rotations) { + try { + const rotatedBase64 = await rotateImageBase64(base64, rotation); + const score = await estimateTextQuality(rotatedBase64); + scores.push({ rotation, score }); + + console.log(` ${rotation}°: quality score = ${score.toFixed(3)}`); + + } catch (error) { + console.warn(` ${rotation}°: Failed to test orientation`); + scores.push({ rotation, score: 0 }); + } + } + + const bestOrientation = scores.reduce((best, current) => + current.score > best.score ? current : best + ); + + console.log(`🎯 Best orientation detected: ${bestOrientation.rotation}°`); + return bestOrientation.rotation; +} + +async function estimateTextQuality(base64: string): Promise<number> { + try { + const cleanBase64 = base64.replace(/^data:image\/[a-z]+;base64,/, ''); + const buffer = Buffer.from(cleanBase64, 'base64'); + + const stats = await sharp(buffer) + .greyscale() + .stats(); + + const contrast = stats.channels[0].max - stats.channels[0].min; + const sharpness = stats.channels[0].stdev; + + return (contrast + sharpness) / 510; + + } catch (error) { + return 0; + } +} + +export async function needsRotation(base64: string): Promise<boolean> { + try { + const cleanBase64 = base64.replace(/^data:image\/[a-z]+;base64,/, ''); + const buffer = Buffer.from(cleanBase64, 'base64'); + + const metadata = await sharp(buffer).metadata(); + + if (metadata.orientation && metadata.orientation > 1) { + console.log(`📐 EXIF orientation detected: ${metadata.orientation}`); + return true; + } + + if (metadata.width && metadata.height) { + const aspectRatio = metadata.width / metadata.height; + if (aspectRatio > 1.5) { + console.log(`📐 Wide aspect ratio detected: ${aspectRatio.toFixed(2)}`); + return true; + } + } + + return false; + + } catch (error) { + console.warn('Error checking if rotation needed:', error); + return false; } - - return false; - - } catch (error) { - console.warn('Error checking if rotation needed:', error); - return false; - } }
\ No newline at end of file |
