// BKSOCRBoss.h
// indent using spaces, tab=2, spaces=2
//
// Created by David Phillip Oster on 4/19/20.
// Copyright © 2020 David Phillip Oster. Apache License.
#import <Cocoa/Cocoa.h>
NS_ASSUME_NONNULL_BEGIN
@class BKSTextPiece;
@interface BKSOCRBoss : NSObject
/// OCR the image on the current thread returning an array of BKSTextPieces else nil.
/// Run this on a separate thread!
///
/// requires macOS 10.15
///
/// @param url The image, in a file to OCR
/// @param error Assigned to if an error occurred
/// @return The array of recognized pieces else nil.
- (nullable NSArray<BKSTextPiece *> *)recognizeImageURL:(NSURL *)url error:(NSError *__autoreleasing _Nullable *)error API_AVAILABLE(macos(10.15));
@end
NS_ASSUME_NONNULL_END
// BKSOCRBoss.m
// indent using spaces, tab=2, spaces=2
//
// Created by David Phillip Oster on 4/19/20.
// Copyright © 2020 David Phillip Oster. Apache License.
#import "BKSOCRBoss.h"
#import "BKSTextPiece.h"
#import <Vision/Vision.h>
NS_ASSUME_NONNULL_BEGIN
enum {
kBKSErrorOCR = 100,
};
static NSString *const kBKSAppDomain = @"BKSAppDomain";
@implementation BKSOCRBoss
- (nullable NSArray<BKSTextPiece *> *)recognizeImageURL:(NSURL *)url error:(NSError **)errorp {
__block NSArray<BKSTextPiece *> *pieces = nil;
__block NSError *__autoreleasing _Nullable *error1p = errorp;
__weak typeof(self) weakSelf = self;
VNRecognizeTextRequest *textRequest =
[[VNRecognizeTextRequest alloc] initWithCompletionHandler:^(VNRequest *request, NSError *error) {
[weakSelf handleTextRequst:request error:error continuation:
^(NSArray *_Nullable idx, NSError *_Nullable error){
pieces = idx;
if (error && error1p) {
*error1p = error;
}
}];
}];
VNImageRequestHandler *handler = nil;
if (textRequest) {
handler = [[VNImageRequestHandler alloc] initWithURL:url options:@{}];
[handler performRequests:@[textRequest] error:errorp];
}
if (nil == handler && errorp) {
NSString *desc = @"Couldn't allocate handler";
NSError *err = [NSError errorWithDomain:kBKSAppDomain code:kBKSErrorOCR userInfo:@{NSLocalizedDescriptionKey : desc}];
*errorp = err;
}
return pieces;
}
- (void)handleTextRequst:(VNRequest *)request
error:(NSError *)error
continuation:(void (^)(NSArray *_Nullable idx, NSError *_Nullable error))continuation API_AVAILABLE(macos(10.15)){
if (error) {
continuation(nil, error);
} else if ([request isKindOfClass:[VNRecognizeTextRequest class]]) {
VNRecognizeTextRequest *textRequests = (VNRecognizeTextRequest *)request;
NSMutableArray<BKSTextPiece *> *pieces = [NSMutableArray array];
NSArray *results = textRequests.results;
for (id rawResult in results) {
if ([rawResult isKindOfClass:[VNRecognizedTextObservation class]]) {
VNRecognizedTextObservation *textO = (VNRecognizedTextObservation *)rawResult;
NSArray<VNRecognizedText *> *text1 = [textO topCandidates:1];
if (text1.count) {
BKSTextPiece *textPiece = [[BKSTextPiece alloc] init];
textPiece.text = text1.firstObject.string;
textPiece.topLeft = textO.topLeft;
textPiece.topRight = textO.topRight;
textPiece.bottomLeft = textO.bottomLeft;
textPiece.bottomRight = textO.bottomRight;
[pieces addObject:textPiece];
}
} else {
NSLog(@"E %@", rawResult);
}
}
continuation(pieces, nil);
} else {
NSString *desc = @"Unrecognized request";
NSError *err = [NSError errorWithDomain:kBKSAppDomain code:kBKSErrorOCR userInfo:@{NSLocalizedDescriptionKey : desc}];
continuation(nil, err);
}
}
@end
NS_ASSUME_NONNULL_END
// BKSTextPiece.h
// indent using spaces, tab=2, spaces=2
//
// Created by David Phillip Oster on 4/19/20.
// Copyright © 2020 David Phillip Oster. Apache License.
#import <Foundation/Foundation.h>
NS_ASSUME_NONNULL_BEGIN
/// Use to hold one line of text returned from the BKSOCRBoss :
/// Coordinate system is in a ratio to the image size. Y=0 is the bottom left.
@interface BKSTextPiece : NSObject
@property NSString *text;
@property(nonatomic) CGPoint topLeft;
@property(nonatomic) CGPoint topRight;
@property(nonatomic) CGPoint bottomLeft;
@property(nonatomic) CGPoint bottomRight;
@end
NS_ASSUME_NONNULL_END
// indent using spaces, tab=2, spaces=2
//
// Created by David Phillip Oster on 4/19/20.
// Copyright © 2020 David Phillip Oster. Apache License.
#import "BKSTextPiece.h"
@implementation BKSTextPiece
- (NSString *)description {
CGFloat x = (self.topLeft.x + self.topRight.x + self.bottomLeft.x + self.bottomRight.x)/4.0;
CGFloat y = (self.topLeft.y + self.topRight.y + self.bottomLeft.y + self.bottomRight.y)/4.0;
return [NSString stringWithFormat:@"%3.1f %3.1f %@", x, y, self.text];
}
@end
// ocrImage.m
// indent using spaces, tab=2, spaces=2
//
// Created by David Phillip Oster on 4/20/20.
// Copyright © 2020 David Phillip Oster. Apache License.
#import <Cocoa/Cocoa.h>
#import "BKSTextPiece.h"
#import "BKSOCRBoss.h"
static BOOL hasInitialLowercase(NSString *s) {
if (s.length) {
unichar c = [s characterAtIndex:0];
return islower(c);
}
return NO;
}
static NSComparisonResult byEndX(id _Nonnull obj1, id _Nonnull obj2, void *unused){
BKSTextPiece *p1 = (BKSTextPiece *)obj1;
BKSTextPiece *p2 = (BKSTextPiece *)obj2;
if (p1.bottomRight.x < p2.bottomRight.x) {
return NSOrderedAscending;
} else if (p1.bottomRight.x > p2.bottomRight.x) {
return NSOrderedDescending;
} else {
return NSOrderedSame;
}
}
static NSComparisonResult byStartX(id _Nonnull obj1, id _Nonnull obj2, void *unused){
BKSTextPiece *p1 = (BKSTextPiece *)obj1;
BKSTextPiece *p2 = (BKSTextPiece *)obj2;
if (p1.bottomLeft.x < p2.bottomLeft.x) {
return NSOrderedAscending;
} else if (p1.bottomLeft.x > p2.bottomLeft.x) {
return NSOrderedDescending;
} else {
return NSOrderedSame;
}
}
// A command line tool that given the path to an image file writes the array of text pieces to the standard output as a plist.
int main(int argc, const char * argv[]) {
@autoreleasepool {
if (@available(macOS 10.15, *)) {
} else {
fprintf(stderr, "Required macOS 10.15 or newer.\n");
return 1;
}
if (argc == 2) {
BKSOCRBoss *boss = [[BKSOCRBoss alloc] init];
NSFileManager *fm = [NSFileManager defaultManager];
NSString *path = [fm stringWithFileSystemRepresentation:argv[1] length:strlen(argv[1])];
NSURL *url = [NSURL fileURLWithPath:path];
if (nil == url) {
fprintf(stderr, "Not found:%s.\n", argv[1]);
return 1;
}
NSImage *image = [[NSImage alloc] initWithContentsOfURL:url];
if (nil == image) {
fprintf(stderr, "Not an image:%s.\n", argv[1]);
return 1;
}
NSError *error = nil;
NSArray<BKSTextPiece *> *pieces = [boss recognizeImageURL:url error:&error];
if (pieces) {
NSMutableArray<BKSTextPiece *> *sortPieces = [pieces mutableCopy];
// Sort by bottomRight.x to find the median.
[sortPieces sortUsingFunction:byEndX context:NULL];
CGFloat medianEndX = sortPieces[sortPieces.count/2].bottomRight.x;
[sortPieces sortUsingFunction:byStartX context:NULL];
CGFloat medianStartX = sortPieces[sortPieces.count/2].bottomLeft.x;
// Concatenate the page into an array of strings. If a previous line is short, the current one starts a new paragraph.
NSMutableArray *a = [NSMutableArray array];
for (NSUInteger i = 0; i < pieces.count; ++i) {
BKSTextPiece *piece = pieces[i];
if (piece.text) {
// If the current line is indented, adjust the previous separator to be a paragaph separator.
if (2 < a.count && medianStartX*1.1 < piece.bottomLeft.x && !hasInitialLowercase(piece.text)) {
a[a.count - 1] = @"\n";
[a addObject:piece.text];
// Insert a paragraph separator if this is a short line, or at the end.
if (piece.bottomRight.x < medianEndX*0.9 || i+1 == pieces.count){
[a addObject:@"\n"];
}else {
[a addObject:@" "];
}
} else if (2 < a.count && [@" " isEqual:a.lastObject] && [a[a.count - 2] hasSuffix:@"-"]) {
// If this isn't the first line of a paragraph, and the previous line ends in '-' assume
// it is hypenated, and delete the hyphen and join.
NSString *lastLine = a[a.count - 2];
a[a.count - 2] = [[lastLine substringToIndex:lastLine.length-1] stringByAppendingString:piece.text];
} else {
[a addObject:piece.text];
// Insert a paragraph separator if this is a short line, or at the end.
if (piece.bottomRight.x < medianEndX*0.9 || i+1 == pieces.count){
[a addObject:@"\n"];
}else {
[a addObject:@" "];
}
}
}
}
NSString *all = [a componentsJoinedByString:@""];
printf("%s", [all UTF8String]);
}
}
}
return 0;
}
ocrImage
is open source under the Apache license.
Version 1.0 - initial release
Version 1.0.1 - added complete source as a web page
Apple's Vision framework requires macOS 10.15 (Catalina) or newer.
ocrImage - main ocrImage page.
Page last modified 4/23/2020