Skip to content

Commit

Permalink
[D-0] �Vision를 통해 영수증 스캔 기능 구현 (#91)
Browse files Browse the repository at this point in the history
* feat: DocumentScanner scanDocument() 구현

* feat: `editImageWithScanResult()` 메서드 구현

* feat: 문서 스캔 기능 구현 및 maskLayer 추가

* fix: CreateOCRLedgerVC 메모리 누수 해결

* fix: 영수증 촬영 시 captureImageView에 원본 이미지가 표현되도록 수정

* chore: CVImageBuffer extension DocumentScanner로 이동

* refactor: transformBoundingBox 네이밍 및 코드 변경

- transformBoundingBox -> transformVisionToIOS 네이밍 변경
- CGAffineTransform -> CGRect로 변경

* refactor: editImageWithScanResult 반환 타입 변경

- CGImage -> CIImage

* refactor: scanDocument 파라미터 및 request 설정값 변경
  • Loading branch information
Siwon-L authored Jan 16, 2025
1 parent 297ca14 commit dc243a3
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,21 @@ import Core

import RxSwift

protocol CameraViewDelegate: AnyObject {
func cameraView(_ cameraView: CameraView, scanResult result: UIImage, originalImage image: UIImage)
}

final class CameraView: UIView {
weak var delegate: AVCapturePhotoCaptureDelegate?
weak var delegate: CameraViewDelegate?

private var scanFailedCount = 0 {
didSet {
if scanFailedCount > 10, !maskLayer.isHidden {
maskLayer.isHidden = true
scanFailedCount = 0
}
}
}

private let captureSession: AVCaptureSession = {
let session = AVCaptureSession()
Expand All @@ -15,6 +28,10 @@ final class CameraView: UIView {
}()

private let stillImageOutput = AVCapturePhotoOutput()
private let videoDataOutput = AVCaptureVideoDataOutput()
private let documentScanner = DocumentScanner()

private var maskLayer = CAShapeLayer()

private let videoPreviewLayer: AVCaptureVideoPreviewLayer = {
let layer = AVCaptureVideoPreviewLayer()
Expand Down Expand Up @@ -70,6 +87,18 @@ final class CameraView: UIView {
captureSession.addOutput(stillImageOutput)
}

if captureSession.canAddOutput(videoDataOutput) {
self.videoDataOutput.setSampleBufferDelegate(self, queue: .global())
captureSession.addOutput(videoDataOutput)

guard let connection = self.videoDataOutput.connection(with: AVMediaType.video),
connection.isVideoOrientationSupported else { return }

connection.videoOrientation = .portrait
}

self.layer.addSublayer(maskLayer)

// 프리뷰 레이어 설정
videoPreviewLayer.session = captureSession

Expand Down Expand Up @@ -100,11 +129,44 @@ final class CameraView: UIView {

var takePhoto: Binder<Void> {
return Binder(self) { owner, _ in
guard let delegate = owner.delegate else {
fatalError("델리게이트를 설정하세요.")
}
let settings = AVCapturePhotoSettings()
owner.stillImageOutput.capturePhoto(with: settings, delegate: delegate)
owner.stillImageOutput.capturePhoto(with: settings, delegate: owner)
}
}
}

extension CameraView: AVCaptureVideoDataOutputSampleBufferDelegate {
func captureOutput(_ output: AVCaptureOutput,didOutput sampleBuffer: CMSampleBuffer,from connection: AVCaptureConnection) {
guard let buffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { return }

Task {
guard let scanRect = try await documentScanner.scanDocument(imageBuffer: buffer, with: bounds) else {
scanFailedCount += 1
return
}
scanFailedCount = 0
updateMaskLayer(in: scanRect)
}
}

private func updateMaskLayer(in rect: CGRect) {
maskLayer.isHidden = false
maskLayer.frame = rect
maskLayer.cornerRadius = 10
maskLayer.borderColor = UIColor.systemBlue.cgColor
maskLayer.borderWidth = 1
maskLayer.opacity = 1
}
}

extension CameraView: AVCapturePhotoCaptureDelegate {
func photoOutput(_ output: AVCapturePhotoOutput, didFinishProcessingPhoto photo: AVCapturePhoto, error: Error?) {
Task {
guard let imageData = photo.fileDataRepresentation(),
let originalImage = UIImage(data: imageData),
let result = await documentScanner.editImageWithScanResult(imageData) else { return }

delegate?.cameraView(self, scanResult: UIImage(ciImage: result), originalImage: originalImage)
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import Vision
import CoreImage

actor DocumentScanner: Sendable {
private var recentScanResult: VNRectangleObservation?

func scanDocument(imageBuffer: CVImageBuffer, with previewSize: CGRect) async throws -> CGRect? {
return try await withCheckedThrowingContinuation { [weak self] continuation in
guard let self else { continuation.resume(returning: nil); return }
let request = VNDetectRectanglesRequest { (request: VNRequest, error: Error?) in
guard let results = request.results as? [VNRectangleObservation],
let rectangleObservation = results.first else {
continuation.resume(returning: nil); return
}

Task {
await self.updateRecentScanResult(rectangleObservation)
let rect = await self.transformVisionToIOS(rectangleObservation, to: previewSize)
continuation.resume(returning: rect)
}
}

request.minimumAspectRatio = 0.2
request.maximumAspectRatio = 1.0
request.minimumConfidence = 0.8

let handler = VNImageRequestHandler(cvPixelBuffer: imageBuffer, options: [:])
do {
try handler.perform([request])
} catch {
continuation.resume(throwing: error)
}
}
}

func editImageWithScanResult(_ imageData: Data) -> CIImage? {
guard let ciImage = CIImage(data: imageData)?.oriented(.right),
let recentScanResult else { return nil }

let topLeft = recentScanResult.topLeft.scaled(to: ciImage.extent.size)
let topRight = recentScanResult.topRight.scaled(to: ciImage.extent.size)
let bottomLeft = recentScanResult.bottomLeft.scaled(to: ciImage.extent.size)
let bottomRight = recentScanResult.bottomRight.scaled(to: ciImage.extent.size)

return ciImage.applyingFilter("CIPerspectiveCorrection", parameters: [
"inputTopLeft": CIVector(cgPoint: topLeft),
"inputTopRight": CIVector(cgPoint: topRight),
"inputBottomLeft": CIVector(cgPoint: bottomLeft),
"inputBottomRight": CIVector(cgPoint: bottomRight),
])
}

private func transformVisionToIOS(_ rectangleObservation: VNRectangleObservation, to previewSize: CGRect) -> CGRect {
let visionRect = rectangleObservation.boundingBox
return CGRect(
origin: CGPoint(x: CGFloat(visionRect.minX * previewSize.width), y: CGFloat((1 - visionRect.maxY) * previewSize.height)),
size: CGSize(width: visionRect.width * previewSize.width, height: visionRect.height * previewSize.height)
)
}

private func updateRecentScanResult(_ rectangleObservation: VNRectangleObservation) {
recentScanResult = rectangleObservation
}
}

private extension CGPoint {
func scaled(to size: CGSize) -> CGPoint {
return CGPoint(x: self.x * size.width,
y: self.y * size.height)
}
}

extension CVImageBuffer: @unchecked @retroactive Sendable {}

Check failure on line 73 in Projects/Feature/Ledger/Sources/Scene/Ledger/Creaters/Scan/Model/DocumentScanner.swift

View workflow job for this annotation

GitHub Actions / upload_testflight

unknown attribute 'retroactive'

Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@ final class CreateOCRLedgerReactor: Reactor {
}

enum Mutation {
case setImageData(Data?)
case setTake(Bool)
case setLoading(Bool)
case setError(MoneyMongError)
case setDestination(State.Destination)
}

struct State {
let agencyId: Int
@Pulse var imageData: Data?
@Pulse var isTook: Bool = false
@Pulse var isLoading: Bool = false
@Pulse var error: MoneyMongError?
@Pulse var destination: Destination?
Expand All @@ -44,32 +44,32 @@ final class CreateOCRLedgerReactor: Reactor {

func mutate(action: Action) -> Observable<Mutation> {
switch action {
case .onAppear:
.just(.setImageData(nil))
case .receiptShoot(let data):
.concat([
.just(.setImageData(data)),
.just(.setTake(true)),
.just(.setLoading(true)),
requsetOCR(data),
.just(.setLoading(false))
])
case let .onError(error):
.just(.setError(error))
case .onAppear:
.just(.setTake(false))
}
}

func reduce(state: State, mutation: Mutation) -> State {
var newState = state
newState.error = nil
switch mutation {
case let .setImageData(data):
newState.imageData = data
case let .setLoading(isLoading):
newState.isLoading = isLoading
case let .setError(error):
newState.error = error
case let .setDestination(destination):
newState.destination = destination
case let .setTake(isTook):
newState.isTook = isTook
}
return newState
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,16 +184,10 @@ final class CreateOCRLedgerVC: UIViewController, View {
.bind(to: reactor.action)
.disposed(by: disposeBag)

reactor.pulse(\.$imageData)
.map { $0 != nil ? UIImage(data: $0!) : nil }
.bind(to: captureImageView.rx.image)
.disposed(by: disposeBag)

reactor.pulse(\.$imageData)
.map { $0 == nil }
reactor.pulse(\.$isTook)
.bind(with: self) { owner, value in
owner.captureImageView.isHidden = value
owner.guideLabel.isHidden = !value
owner.captureImageView.isHidden = !value
owner.guideLabel.isHidden = value
}
.disposed(by: disposeBag)

Expand All @@ -217,8 +211,8 @@ final class CreateOCRLedgerVC: UIViewController, View {
.alert(
title: error.errorTitle,
subTitle: error.errorDescription,
type: .onlyOkButton({ [weak self] in
self?.captureImageView.image = nil
type: .onlyOkButton({
owner.captureImageView.image = nil
})
)
)
Expand All @@ -238,9 +232,11 @@ final class CreateOCRLedgerVC: UIViewController, View {
}
}

extension CreateOCRLedgerVC: AVCapturePhotoCaptureDelegate {
func photoOutput(_ output: AVCapturePhotoOutput, didFinishProcessingPhoto photo: AVCapturePhoto, error: Error?) {
let imageData = photo.fileDataRepresentation()
extension CreateOCRLedgerVC: CameraViewDelegate {
func cameraView(_ cameraView: CameraView, scanResult result: UIImage, originalImage image: UIImage) {
captureImageView.image = image

guard let imageData = result.jpegData(compressionQuality: 1.0) else { return }
reactor?.action.onNext(.receiptShoot(imageData))
}
}

0 comments on commit dc243a3

Please sign in to comment.