// app/api/ocr/utils/tableExtraction.ts // 개선된 완전 테이블 추출 로직 – Format‑1 식별번호 파싱 보강 & 중복 행 제거 /* -------------------------------------------------------------------------- */ /* 타입 */ /* -------------------------------------------------------------------------- */ export interface ExtractedRow { no: string; identificationNo: string; tagNo: string; jointNo: string; jointType: string; weldingDate: string; confidence: number; sourceTable: number; sourceRow: number; } interface TableCell { cellTextLines: Array<{ cellWords: Array<{ inferText: string; inferConfidence: number; }>; }>; rowIndex: number; columnIndex: number; rowSpan: number; columnSpan: number; inferConfidence: number; } interface OCRTable { cells: TableCell[]; inferConfidence: number; } interface ColumnMapping { no: number; identification: number; tagNo: number; jointNo: number; jointType: number; weldingDate: number; } /* -------------------------------------------------------------------------- */ /* 메인 */ /* -------------------------------------------------------------------------- */ export async function extractTablesFromOCR (ocrResult: any): Promise { const tables: ExtractedRow[][] = []; if (!ocrResult?.images) return tables; ocrResult.images.forEach((image: any, imgIdx: number) => { image.tables?.forEach((table: OCRTable, tblIdx: number) => { if (!isRelevantTable(table)) return; const rows = extractTableData(table, imgIdx, tblIdx); if (rows.length) tables.push(rows); }); }); return tables; } /* -------------------------------------------------------------------------- */ /* 관련 테이블 판별 */ /* -------------------------------------------------------------------------- */ function isRelevantTable (table: OCRTable): boolean { const headers = table.cells.filter(c => c.rowIndex < 3).map(getCellText).join(' ').toLowerCase(); console.log(`🔍 Checking table relevance. Headers: "${headers}"`); // 기존 조건 const hasNoColumn = /\bno\b|번호/.test(headers); const hasIdentification = /identification|식별|ident|id/.test(headers); console.log(`📝 Has NO column: ${hasNoColumn}`); console.log(`📝 Has Identification: ${hasIdentification}`); // 기본 조건 if (hasNoColumn && hasIdentification) { console.log(`✅ Table passes strict criteria`); return true; } // 완화된 조건들 const relaxedConditions = [ // 조건 1: 테이블에 여러 열이 있고 숫자나 식별자 패턴이 보이는 경우 table.cells.length > 10 && /\d+/.test(headers), // 조건 2: joint, tag, weld 등 관련 키워드가 있는 경우 /joint|tag|weld|type|date/.test(headers), // 조건 3: 식별번호 패턴이 보이는 경우 (하이픈이 포함된 문자열) headers.includes('-') && headers.length > 20, // 조건 4: 한국어 관련 키워드 /용접|조인트|태그/.test(headers) ]; const passedConditions = relaxedConditions.filter(Boolean).length; console.log(`📊 Relaxed conditions passed: ${passedConditions}/${relaxedConditions.length}`); if (passedConditions >= 1) { console.log(`✅ Table passes relaxed criteria`); return true; } console.log(`❌ Table does not meet any criteria`); return false; } /* -------------------------------------------------------------------------- */ /* 표 해석 */ /* -------------------------------------------------------------------------- */ function extractTableData (table: OCRTable, imgIdx: number, tblIdx: number): ExtractedRow[] { console.log(`🔧 Starting extractTableData for table ${imgIdx}-${tblIdx}`); const grid = buildGrid(table); console.log(`📊 Grid size: ${grid.length} rows x ${grid[0]?.length || 0} columns`); const headerRowIdx = findHeaderRow(grid); console.log(`📍 Header row index: ${headerRowIdx}`); if (headerRowIdx === -1) { console.log(`❌ No header row found`); return []; } const format = detectFormat(grid[headerRowIdx]); const mapping = mapColumns(grid[headerRowIdx]); console.log(`📋 Detected format: ${format}`); console.log(`🗂️ Column mapping:`, mapping); const seen = new Set(); const data: ExtractedRow[] = []; for (let r = headerRowIdx + 1; r < grid.length; r++) { const row = grid[r]; if (isBlankRow(row)) { console.log(`⏭️ Row ${r}: blank, skipping`); continue; } console.log(`🔍 Processing row ${r}: [${row.join(' | ')}]`); const parsed = buildRow(row, format, mapping, tblIdx, r); if (!parsed) { console.log(`❌ Row ${r}: failed to parse`); continue; } if (!isValidRow(parsed)) { console.log(`❌ Row ${r}: invalid (no: "${parsed.no}", id: "${parsed.identificationNo}")`); continue; } const key = `${parsed.no}-${parsed.identificationNo}`; if (seen.has(key)) { console.log(`⚠️ Row ${r}: duplicate key "${key}", skipping`); continue; } seen.add(key); data.push(parsed); console.log(`✅ Row ${r}: added (${JSON.stringify(parsed)})`); } console.log(`🎯 Table ${imgIdx}-${tblIdx}: extracted ${data.length} valid rows`); return data; } /* -------------------------------------------------------------------------- */ /* Grid & Header */ /* -------------------------------------------------------------------------- */ function buildGrid (table: OCRTable): string[][] { console.log(`🔧 Building grid from ${table.cells.length} cells`); const maxR = Math.max(...table.cells.map(c => c.rowIndex + c.rowSpan - 1)); const maxC = Math.max(...table.cells.map(c => c.columnIndex + c.columnSpan - 1)); console.log(`📊 Grid dimensions: ${maxR + 1} rows x ${maxC + 1} columns`); const grid = Array.from({ length: maxR + 1 }, () => Array(maxC + 1).fill('')); // 셀별 상세 정보 출력 table.cells.forEach((cell, idx) => { const txt = getCellText(cell); console.log(`📱 Cell ${idx}: (${cell.rowIndex},${cell.columnIndex}) span(${cell.rowSpan},${cell.columnSpan}) = "${txt}"`); for (let r = cell.rowIndex; r < cell.rowIndex + cell.rowSpan; r++) { for (let c = cell.columnIndex; c < cell.columnIndex + cell.columnSpan; c++) { const oldValue = grid[r][c]; const newValue = oldValue ? `${oldValue} ${txt}` : txt; grid[r][c] = newValue; if (oldValue) { console.log(`🔄 Grid[${r}][${c}]: "${oldValue}" → "${newValue}"`); } } } }); // 최종 그리드 출력 console.log(`📋 Final grid:`); grid.forEach((row, r) => { console.log(` Row ${r}: [${row.map(cell => `"${cell}"`).join(', ')}]`); }); return grid; } function getCellText (cell: TableCell): string { return cell.cellTextLines?.flatMap(l => l.cellWords.map(w => w.inferText.trim())).filter(Boolean).join(' ') ?? ''; } function findHeaderRow (grid: string[][]): number { console.log(`🔍 Finding header row in grid with ${grid.length} rows`); for (let i = 0; i < Math.min(5, grid.length); i++) { const rowText = grid[i].join(' ').toLowerCase(); console.log(`📝 Row ${i}: "${rowText}"`); // 기존 엄격한 조건 if (/\bno\b|번호/.test(rowText) && /identification|식별|ident/.test(rowText)) { console.log(`✅ Row ${i}: Strict match`); return i; } // 완화된 조건들 const relaxedMatches = [ // 1. NO 컬럼 + 다른 관련 키워드 (/\bno\b|번호/.test(rowText) && /joint|tag|type|weld|date/.test(rowText)), // 2. ID/식별 + 다른 관련 키워드 (/identification|식별|ident|id/.test(rowText) && /joint|tag|no|type/.test(rowText)), // 3. 용접 관련 키워드가 여러 개 (rowText.match(/joint|tag|type|weld|date|no|id|식별|번호|용접/g)?.length >= 3), // 4. 첫 번째 행이고 여러 단어가 있는 경우 (i === 0 && rowText.split(/\s+/).filter(w => w.length > 1).length >= 3) ]; if (relaxedMatches.some(Boolean)) { console.log(`✅ Row ${i}: Relaxed match`); return i; } console.log(`❌ Row ${i}: No match`); } // 최후의 수단: 첫 번째 비어있지 않은 행 for (let i = 0; i < Math.min(3, grid.length); i++) { if (grid[i].some(cell => cell.trim().length > 0)) { console.log(`⚠️ Using row ${i} as fallback header`); return i; } } console.log(`❌ No header row found`); return -1; } /* -------------------------------------------------------------------------- */ /* Column Mapping */ /* -------------------------------------------------------------------------- */ function detectFormat (header: string[]): 'format1' | 'format2' { const h = header.join(' ').toLowerCase(); return h.includes('tag') && h.includes('joint') ? 'format2' : 'format1'; } function mapColumns (header: string[]): ColumnMapping { const mp: ColumnMapping = { no: -1, identification: -1, tagNo: -1, jointNo: -1, jointType: -1, weldingDate: -1 }; console.log(`🗂️ Smart mapping columns from header: [${header.map(h => `"${h}"`).join(', ')}]`); // === STEP 1: 기존 개별 컬럼 매핑 === header.forEach((h, i) => { const t = h.toLowerCase().trim(); console.log(`📋 Column ${i}: "${h}" → "${t}"`); if (mp.no === -1 && (/^no\.?$/i.test(t) || /^번호$/i.test(t) || /^순번$/i.test(t))) { mp.no = i; console.log(`✅ NO column (individual) mapped to index ${i}`); } if (mp.identification === -1 && (/identification.*no/i.test(t) || /식별.*번호/i.test(t))) { mp.identification = i; console.log(`✅ Identification column (individual) mapped to index ${i}`); } if (mp.tagNo === -1 && (/tag.*no/i.test(t) || /태그.*번호/i.test(t))) { mp.tagNo = i; console.log(`✅ Tag No column (individual) mapped to index ${i}`); } if (mp.jointNo === -1 && (/joint.*no/i.test(t) || /조인트.*번호/i.test(t) || /oint.*no/i.test(t))) { mp.jointNo = i; console.log(`✅ Joint No column (individual) mapped to index ${i}`); } if (mp.jointType === -1 && (/joint.*type/i.test(t) || /^type$/i.test(t) || /형태/i.test(t))) { mp.jointType = i; console.log(`✅ Joint Type column (individual) mapped to index ${i}`); } if (mp.weldingDate === -1 && (/welding.*date/i.test(t) || /weld.*date/i.test(t) || /^date$/i.test(t) || /날짜/i.test(t))) { mp.weldingDate = i; console.log(`✅ Welding Date column (individual) mapped to index ${i}`); } }); // === STEP 2: 실용적 추론 === console.log(`🤖 Starting practical column inference...`); // NO 컬럼이 매핑되지 않았다면, 첫 번째 컬럼을 NO로 추정 if (mp.no === -1) { mp.no = 0; console.log(`🔮 NO column inferred as index 0 (first column)`); } // Identification 컬럼 찾기 - "identification" 키워드가 포함된 컬럼 중에서 if (mp.identification === -1) { for (let i = 0; i < header.length; i++) { const text = header[i].toLowerCase(); if (text.includes('identification') || text.includes('식별')) { mp.identification = i; console.log(`🆔 Identification column found at index ${i}`); break; } } } // Tag No 컬럼 찾기 - "tag" 키워드가 포함된 컬럼 중에서 if (mp.tagNo === -1) { for (let i = 0; i < header.length; i++) { const text = header[i].toLowerCase(); if (text.includes('tag') && !text.includes('no')) { mp.tagNo = i; console.log(`🏷️ Tag column found at index ${i}`); break; } } } // Joint No 컬럼 찾기 if (mp.jointNo === -1) { for (let i = 0; i < header.length; i++) { const text = header[i].toLowerCase(); if (text.includes('joint') || text.includes('oint')) { mp.jointNo = i; console.log(`🔗 Joint column found at index ${i}`); break; } } } // === STEP 3: 패턴 기반 추론 (마지막 수단) === console.log(`🎯 Pattern-based fallback mapping...`); // 전체 헤더에서 실제 식별번호 패턴이 있는 컬럼 찾기 if (mp.identification === -1) { for (let i = 0; i < header.length; i++) { const text = header[i]; // 하이픈이 포함된 긴 문자열이 있는 컬럼 if (text.includes('-') && text.length > 15) { mp.identification = i; console.log(`🆔 Identification inferred at index ${i} (contains ID pattern)`); break; } } } // 숫자 패턴이 있는 컬럼을 Tag No로 추정 if (mp.tagNo === -1) { for (let i = 1; i < header.length; i++) { // 첫 번째 컬럼 제외 const text = header[i]; // 7-8자리 숫자가 있는 컬럼 if (/\d{7,8}/.test(text)) { mp.tagNo = i; console.log(`🏷️ Tag No inferred at index ${i} (contains number pattern)`); break; } } } // === STEP 4: 기본값 설정 === console.log(`🔧 Setting default values for unmapped columns...`); // 여전히 매핑되지 않은 중요한 컬럼들에 대해 순서 기반 추정 const essentialColumns = [ { key: 'identification', currentValue: mp.identification, defaultIndex: 1 }, { key: 'tagNo', currentValue: mp.tagNo, defaultIndex: 2 }, { key: 'jointNo', currentValue: mp.jointNo, defaultIndex: 3 }, { key: 'jointType', currentValue: mp.jointType, defaultIndex: 4 }, { key: 'weldingDate', currentValue: mp.weldingDate, defaultIndex: Math.min(5, header.length - 1) } ]; essentialColumns.forEach(col => { if ((col.currentValue as number) === -1 && col.defaultIndex < header.length) { (mp as any)[col.key] = col.defaultIndex; console.log(`🔧 ${col.key} set to default index ${col.defaultIndex}`); } }); console.log(`🎯 Final optimized column mapping:`, mp); // === STEP 5: 매핑 품질 검증 === const mappedCount = Object.values(mp).filter(v => v !== -1).length; const totalColumns = Object.keys(mp).length; const mappingQuality = mappedCount / totalColumns; console.log(`📊 Mapping quality: ${mappedCount}/${totalColumns} (${(mappingQuality * 100).toFixed(1)}%)`); if (mappingQuality < 0.5) { console.warn(`⚠️ Low mapping quality detected. Consider manual adjustment.`); } return mp; } /* -------------------------------------------------------------------------- */ /* Row Extraction */ /* -------------------------------------------------------------------------- */ function buildRow ( row: string[], format: 'format1' | 'format2', mp: ColumnMapping, tblIdx: number, rowIdx: number ): ExtractedRow | null { console.log(`🔨 Building row from: [${row.map(r => `"${r}"`).join(', ')}]`); console.log(`📋 Using mapping:`, mp); console.log(`📄 Format: ${format}`); const out: ExtractedRow = { no: '', identificationNo: '', tagNo: '', jointNo: '', jointType: '', weldingDate: '', confidence: 0, sourceTable: tblIdx, sourceRow: rowIdx, }; // === STEP 1: 매핑된 컬럼에서 기본 추출 === // NO 컬럼 추출 if (mp.no >= 0 && mp.no < row.length) { const rawNo = clean(row[mp.no]); // NO 필드에서 첫 번째 숫자 패턴 추출 const noMatch = rawNo.match(/\b(\d{2,4})\b/); out.no = noMatch ? noMatch[1] : rawNo; console.log(`📍 NO from column ${mp.no}: "${out.no}" (raw: "${rawNo}")`); } // Joint Type, Welding Date는 기존대로 if (mp.jointType >= 0 && mp.jointType < row.length) { out.jointType = clean(row[mp.jointType]); console.log(`🔗 Joint Type from column ${mp.jointType}: "${out.jointType}"`); } if (mp.weldingDate >= 0 && mp.weldingDate < row.length) { out.weldingDate = clean(row[mp.weldingDate]); console.log(`📅 Welding Date from column ${mp.weldingDate}: "${out.weldingDate}"`); } // === STEP 2: Format별 데이터 추출 === if (format === 'format2') { console.log(`📄 Processing Format 2 (separate columns)`); if (mp.identification >= 0 && mp.identification < row.length) { out.identificationNo = clean(row[mp.identification]); console.log(`🆔 Identification from column ${mp.identification}: "${out.identificationNo}"`); } if (mp.jointNo >= 0 && mp.jointNo < row.length) { out.jointNo = clean(row[mp.jointNo]); console.log(`🔗 Joint No from column ${mp.jointNo}: "${out.jointNo}"`); } if (mp.tagNo >= 0 && mp.tagNo < row.length) { out.tagNo = clean(row[mp.tagNo]); console.log(`🏷️ Tag No from column ${mp.tagNo}: "${out.tagNo}"`); } } else { console.log(`📄 Processing Format 1 (combined identification column)`); let combinedText = ''; // 매핑된 identification 컬럼에서 텍스트 가져오기 if (mp.identification >= 0 && mp.identification < row.length) { combinedText = row[mp.identification]; console.log(`🆔 Combined text from column ${mp.identification}: "${combinedText}"`); } const parsed = parseIdentificationData(combinedText); out.identificationNo = parsed.identificationNo; out.jointNo = parsed.jointNo; out.tagNo = parsed.tagNo; console.log(`📊 Parsed from identification column:`, parsed); } // === STEP 3: 적극적 패턴 매칭으로 누락된 필드 채우기 === console.log(`🔍 Aggressive pattern matching for missing fields...`); const allText = row.join(' '); console.log(`📝 Full row text: "${allText}"`); // NO 필드가 비어있다면 첫 번째 컬럼에서 숫자 패턴 찾기 if (!out.no && row.length > 0) { const firstCol = clean(row[0]); const noPatterns = [ /\b(\d{3})\b/g, // 3자리 숫자 /\b(\d{2,4})\b/g, // 2-4자리 숫자 /^(\d+)/ // 맨 앞 숫자 ]; for (const pattern of noPatterns) { const matches = firstCol.match(pattern); if (matches && matches.length > 0) { out.no = matches[0].replace(/\D/g, ''); // 숫자만 추출 console.log(`📍 NO found via pattern in first column: "${out.no}"`); break; } } } // Identification No 패턴 찾기 (하이픈이 포함된 긴 문자열) if (!out.identificationNo) { const idPatterns = [ /[A-Za-z0-9]+-[A-Za-z0-9]+-[A-Za-z0-9\-]+/g, /-\d+[A-Za-z0-9]+-[A-Za-z0-9]+-[A-Za-z0-9]+/g, /\b[A-Z]\d+[A-Z]-\d+-\d+-[A-Z]+-\d+-[A-Z0-9]+-[A-Z]-[A-Z0-9]+\b/g ]; for (const pattern of idPatterns) { const matches = allText.match(pattern); if (matches && matches.length > 0) { out.identificationNo = matches[0]; console.log(`🆔 Identification found via pattern: "${out.identificationNo}"`); break; } } } // Tag No 패턴 찾기 (7-8자리 숫자) if (!out.tagNo) { const tagMatches = allText.match(/\b\d{7,8}\b/g); if (tagMatches && tagMatches.length > 0) { out.tagNo = tagMatches[0]; console.log(`🏷️ Tag found via pattern: "${out.tagNo}"`); } } // Joint No 패턴 찾기 (짧은 영숫자 조합) if (!out.jointNo) { const jointPatterns = [ /\b[A-Z]{2,4}\d*\b/g, // 대문자+숫자 조합 /\b[A-Za-z0-9]{2,6}\b/g // 일반적인 짧은 조합 ]; for (const pattern of jointPatterns) { const matches = allText.match(pattern); if (matches) { const candidates = matches.filter(m => m !== out.no && m !== out.tagNo && m !== out.identificationNo && m.length >= 2 && m.length <= 6 && !/^(no|tag|joint|type|date|welding|project|samsung|class)$/i.test(m) ); if (candidates.length > 0) { out.jointNo = candidates[0]; console.log(`🔗 Joint found via pattern: "${out.jointNo}"`); break; } } } } // Welding Date 패턴 찾기 if (!out.weldingDate) { const datePatterns = [ /\d{4}[.\-/]\d{1,2}[.\-/]\d{1,2}/g, /\d{4}\.\d{2}\.\d{2}/g ]; for (const pattern of datePatterns) { const matches = allText.match(pattern); if (matches && matches.length > 0) { out.weldingDate = matches[0]; console.log(`📅 Date found via pattern: "${out.weldingDate}"`); break; } } } // === STEP 4: 품질 검증 및 후처리 === // 추출된 값들 정리 Object.keys(out).forEach(key => { const value = (out as any)[key]; if (typeof value === 'string' && value) { (out as any)[key] = value.replace(/^[^\w]+|[^\w]+$/g, '').trim(); } }); out.confidence = scoreRow(out); console.log(`📊 Final extracted row:`, out); console.log(`🎯 Row confidence: ${out.confidence}`); // 최소한의 데이터가 있는지 검증 const hasAnyData = !!(out.no || out.identificationNo || out.tagNo || out.jointNo); if (!hasAnyData) { console.log(`⚠️ No meaningful data extracted from row`); return null; } return out; } /* -------------------------------------------------------------------------- */ /* Format‑1 셀 파싱 */ /* -------------------------------------------------------------------------- */ function parseIdentificationData (txt: string): { identificationNo: string; jointNo: string; tagNo: string } { console.log(`🔍 Parsing identification data from: "${txt}"`); const cleaned = clean(txt); if (!cleaned) { console.log(`❌ Empty input text`); return { identificationNo: '', jointNo: '', tagNo: '' }; } console.log(`🧹 Cleaned text: "${cleaned}"`); const result = { identificationNo: '', jointNo: '', tagNo: '' }; // 1. Identification No 추출 (하이픈이 2개 이상 포함된 패턴) const idPatterns = [ /[A-Za-z0-9]+-[A-Za-z0-9]+-[A-Za-z0-9\-]+/g, // 기본 패턴 /-\d+[A-Za-z0-9]+-[A-Za-z0-9]+-[A-Za-z0-9]+/g, // 앞에 하이픈이 있는 경우 /\b[A-Za-z0-9]{2,}-[A-Za-z0-9]{2,}-[A-Za-z0-9]{2,}\b/g // 더 엄격한 패턴 ]; for (const pattern of idPatterns) { const matches = cleaned.match(pattern); if (matches && matches.length > 0) { // 가장 긴 매치를 선택 result.identificationNo = matches.reduce((a, b) => a.length >= b.length ? a : b); console.log(`🆔 Found identification: "${result.identificationNo}"`); break; } } // 2. Tag No 추출 (7-8자리 숫자) const tagPatterns = [ /\btag[:\s]*(\d{7,8})\b/i, // "tag: 1234567" 형태 /\b(\d{7,8})\b/g // 단순 7-8자리 숫자 ]; for (const pattern of tagPatterns) { const matches = cleaned.match(pattern); if (matches) { if (pattern.source.includes('tag')) { result.tagNo = matches[1] || matches[0]; } else { // 모든 7-8자리 숫자를 찾아서 가장 적절한 것 선택 const candidates = matches.filter(m => m && m.length >= 7 && m.length <= 8); if (candidates.length > 0) { result.tagNo = candidates[0]; } } if (result.tagNo) { console.log(`🏷️ Found tag: "${result.tagNo}"`); break; } } } // 3. Joint No 추출 (나머지 토큰 중에서) const tokens = cleaned.split(/\s+/).map(clean).filter(Boolean); console.log(`📝 All tokens: [${tokens.join(', ')}]`); // 이미 사용된 토큰들 제외 const usedTokens = new Set([result.identificationNo, result.tagNo]); const remainingTokens = tokens.filter(token => !usedTokens.has(token) && !result.identificationNo.includes(token) && !result.tagNo.includes(token) && token.length > 1 && !/^(tag|joint|no|identification|식별|번호)$/i.test(token) ); console.log(`🔄 Remaining tokens for joint: [${remainingTokens.join(', ')}]`); if (remainingTokens.length > 0) { // 가장 짧고 알파벳+숫자 조합인 토큰을 Joint No로 선택 const jointCandidates = remainingTokens .filter(token => /^[A-Za-z0-9]+$/.test(token) && token.length >= 2 && token.length <= 8) .sort((a, b) => a.length - b.length); if (jointCandidates.length > 0) { result.jointNo = jointCandidates[0]; console.log(`🔗 Found joint: "${result.jointNo}"`); } else if (remainingTokens.length > 0) { // 후보가 없으면 가장 짧은 토큰 사용 result.jointNo = remainingTokens.reduce((a, b) => a.length <= b.length ? a : b); console.log(`🔗 Found joint (fallback): "${result.jointNo}"`); } } // 4. 결과 검증 및 정리 Object.keys(result).forEach(key => { const value = (result as any)[key]; if (value && typeof value === 'string') { (result as any)[key] = value.replace(/^[^\w]+|[^\w]+$/g, ''); // 앞뒤 특수문자 제거 } }); console.log(`📊 Final parsed result:`, result); return result; } /* -------------------------------------------------------------------------- */ /* Helpers */ /* -------------------------------------------------------------------------- */ const clean = (s: string = '') => s.replace(/[\r\n\t]+/g, ' ').replace(/\s+/g, ' ').trim(); const isBlankRow = (row: string[]) => row.every(c => !clean(c)); function isValidRow (r: ExtractedRow): boolean { console.log(`✅ Validating row: no="${r.no}", id="${r.identificationNo}", tag="${r.tagNo}", joint="${r.jointNo}"`); // Level 1: 기존 엄격한 조건 if (r.no && r.no.trim() || r.identificationNo && r.identificationNo.trim()) { console.log(`✅ Level 1 validation passed (has no or identification)`); return true; } // Level 2: 완화된 조건 - 주요 필드 중 2개 이상 const mainFields = [ r.no?.trim(), r.identificationNo?.trim(), r.tagNo?.trim(), r.jointNo?.trim() ].filter(Boolean); if (mainFields.length >= 2) { console.log(`✅ Level 2 validation passed (${mainFields.length} main fields present)`); return true; } // Level 3: 더 관대한 조건 - 어떤 필드든 하나라도 의미있는 값 const allFields = [ r.no?.trim(), r.identificationNo?.trim(), r.tagNo?.trim(), r.jointNo?.trim(), r.jointType?.trim(), r.weldingDate?.trim() ].filter(field => field && field.length > 1); // 1글자 이상 if (allFields.length >= 1) { console.log(`✅ Level 3 validation passed (${allFields.length} fields with meaningful content)`); return true; } console.log(`❌ Validation failed - no meaningful content found`); return false; } function scoreRow (r: ExtractedRow): number { const w: Record = { no: 1, identificationNo: 3, tagNo: 2, jointNo: 2, jointType: 1, weldingDate: 1, confidence: 0, sourceTable: 0, sourceRow: 0, } as any; let s = 0, t = 0; (Object.keys(w) as (keyof ExtractedRow)[]).forEach(k => { t += w[k]; if ((r[k] as string)?.length) s += w[k]; }); return t ? s / t : 0; } /* -------------------------------------------------------------------------- */ /* OCR 품질 분석 (기존 로직 유지) */ /* -------------------------------------------------------------------------- */ export function analyzeOCRQuality (ocrResult: any) { let conf = 0, cnt = 0, tbl = 0, kw = 0; const keys = ['no.', 'identification', 'joint', 'tag', 'type', 'weld', 'date']; ocrResult.images?.forEach((img: any) => { tbl += img.tables?.length || 0; img.fields?.forEach((f: any) => { conf += f.inferConfidence || 0; cnt++; const t = (f.inferText || '').toLowerCase(); keys.forEach(k => { if (t.includes(k)) kw++; }); }); }); return { confidence: cnt ? conf / cnt : 0, tablesFound: tbl, textQuality: cnt ? kw / cnt : 0, keywordCount: kw }; }