Hello,
I need help to implement text detection with Vision + coreML in Pythonista.
Does someone have an example?
Regards.
Hello,
I need help to implement text detection with Vision + coreML in Pythonista.
Does someone have an example?
Regards.
@pavlinb see here
Try the script with a photo of one letter with this .mlmodel to be downloaded
Two little modifications in the script:
MODEL_FILENAME = 'mobilenet.mlmodel'
MODEL_FILENAME = 'Alphanum_28x28.mlmodel'
all_assets = photos.get_assets()
asset = photos.pick_asset(assets=all_assets)
#asset = photos.pick_asset()

Thank you for fast responce. I was inspired exactly from given link.
Do you know the proper way of using VNDetectTextRectanglesRequest to obtain text form image in pythonista?
Kind regards.
@pavlinb try this quick and dirty (but working π) code on an image containing text
# from https://github.com/SwiftBrain/HelloVision/blob/master/HelloVision/TextDetectionViewController.swift
from objc_util import *
import photos
import ui
VNImageRequestHandler = ObjCClass('VNImageRequestHandler')
VNDetectTextRectanglesRequest = ObjCClass('VNDetectTextRectanglesRequest')
def classify_asset(asset):
img_data = ns(asset.get_image_data().getvalue())
req = VNDetectTextRectanglesRequest.alloc().init()
handler = VNImageRequestHandler.alloc().initWithData_options_(img_data, None).autorelease()
success = handler.performRequests_error_([req], None)
if success:
im = ui.ImageView()
ui_image = asset.get_ui_image()
wi,hi = ui_image.size
im.frame = (0,0,400,400*hi/wi)
wi = im.width
hi = im.height
im.image = ui_image
im.present('sheet')
for i in range(0,len(req.results())):
observation = req.results()[i]
box = observation.boundingBox()
x=box.origin.x
y=box.origin.y
w=box.size.width
h=box.size.height
print('x=',x)
print('y=',y)
print('width=',w)
print('height=',h)
l = ui.Label()
l.frame = (x*wi,y*hi,w*wi,h*hi)
print(l.frame)
l.border_width = 1
l.border_color = 'red'
im.add_subview(l)
#print(dir(observation.boundingBox()))
confidence = observation.confidence()
print('confidence', confidence)
print('ok')
else:
print('error')
def main():
all_assets = photos.get_assets()
asset = photos.pick_asset(assets=all_assets)
if asset is None:
return
classify_asset(asset)
if __name__ == '__main__':
main()

Cool. Thanks.
But is there any way to gues the proper format of native iOS frameworks in Pythonista?
There are lot of functiond from Vison that I want to test...
Regards.
@pavlinb examples and experience and forum π
@cvp said:
Try the script with a photo of one letter with this .mlmodel to be downloaded
Two little modifications in the script:
MODEL_FILENAME = 'mobilenet.mlmodel' MODEL_FILENAME = 'Alphanum_28x28.mlmodel'
all_assets = photos.get_assets() asset = photos.pick_asset(assets=all_assets) #asset = photos.pick_asset()
This example works good on one character. Is there a way to make it working on more symbols?
@pavlinb Try this script and then, try to mix it with the script with mlmodel
# from https://github.com/SwiftBrain/HelloVision/blob/master/HelloVision/TextDetectionViewController.swift
from objc_util import *
import photos
import ui
VNImageRequestHandler = ObjCClass('VNImageRequestHandler')
VNDetectTextRectanglesRequest = ObjCClass('VNDetectTextRectanglesRequest')
def classify_asset(asset):
img_data = ns(asset.get_image_data().getvalue())
req = VNDetectTextRectanglesRequest.alloc().init()
req.reportCharacterBoxes = True
handler = VNImageRequestHandler.alloc().initWithData_options_(img_data, None).autorelease()
success = handler.performRequests_error_([req], None)
if success:
im = ui.ImageView()
ui_image = asset.get_ui_image()
wi,hi = ui_image.size
im.frame = (0,0,400,400*hi/wi)
wi = im.width
hi = im.height
im.image = ui_image
im.present('sheet')
for i in range(0,len(req.results())):
observation = req.results()[i]
box = observation.boundingBox()
x=box.origin.x
y=box.origin.y
w=box.size.width
h=box.size.height
print('x=',x)
print('y=',y)
print('width=',w)
print('height=',h)
l = ui.Label()
l.frame = (x*wi,y*hi,w*wi,h*hi)
print(l.frame)
l.border_width = 1
l.border_color = 'red'
im.add_subview(l)
print(dir(observation))
confidence = observation.confidence()
print('confidence', confidence)
for i_ch in range(0,len(observation.characterBoxes())):
ch_box = observation.characterBoxes()[i_ch]
box = ch_box.boundingBox()
x=box.origin.x
y=box.origin.y
w=box.size.width
h=box.size.height
print('x=',x)
print('y=',y)
print('width=',w)
print('height=',h)
l = ui.Label()
l.frame = (x*wi,y*hi,w*wi,h*hi)
print(l.frame)
l.border_width = 1
l.border_color = 'blue'
im.add_subview(l)
#print(dir(ch_box))
#break
print('ok')
else:
print('error')
def main():
all_assets = photos.get_assets()
asset = photos.pick_asset(assets=all_assets)
if asset is None:
return
classify_asset(asset)
if __name__ == '__main__':
main()

@pavlinb should work but result is not correct π’
#!python3
'''
This is a demo of how you can use the CoreML framework (via objc_util) to classify images in Pythonista. It downloads the trained 'MobileNet' CoreML model from the Internet, and uses it to classify images that are either taken with the camera, or picked from the photo library.
'''
import requests
import os
import io
import photos
import dialogs
from PIL import Image
from objc_util import ObjCClass, nsurl, ns
import ui
# Configuration (change URL and filename if you want to use a different model):
MODEL_URL = 'https://docs-assets.developer.apple.com/coreml/models/MobileNet.mlmodel'
MODEL_FILENAME = 'Alphanum_28x28.mlmodel'
# Use a local path for caching the model file (no need to sync this with iCloud):
MODEL_PATH = os.path.join(os.path.expanduser('~/Documents'), MODEL_FILENAME)
# Declare/import ObjC classes:
MLModel = ObjCClass('MLModel')
VNCoreMLModel = ObjCClass('VNCoreMLModel')
VNCoreMLRequest = ObjCClass('VNCoreMLRequest')
VNImageRequestHandler = ObjCClass('VNImageRequestHandler')
VNDetectTextRectanglesRequest = ObjCClass('VNDetectTextRectanglesRequest')
def load_model():
'''Helper method for downloading/caching the mlmodel file'''
if not os.path.exists(MODEL_PATH):
print(f'Downloading model: {MODEL_FILENAME}...')
r = requests.get(MODEL_URL, stream=True)
file_size = int(r.headers['content-length'])
with open(MODEL_PATH, 'wb') as f:
bytes_written = 0
for chunk in r.iter_content(1024*100):
f.write(chunk)
print(f'{bytes_written/file_size*100:.2f}% downloaded')
bytes_written += len(chunk)
print('Download finished')
ml_model_url = nsurl(MODEL_PATH)
# Compile the model:
c_model_url = MLModel.compileModelAtURL_error_(ml_model_url, None)
# Load model from the compiled model file:
ml_model = MLModel.modelWithContentsOfURL_error_(c_model_url, None)
# Create a VNCoreMLModel from the MLModel for use with the Vision framework:
vn_model = VNCoreMLModel.modelForMLModel_error_(ml_model, None)
return vn_model
def _classify_img_data(img_data):
'''The main image classification method, used by `classify_image` (for camera images) and `classify_asset` (for photo library assets).'''
vn_model = load_model()
# Create and perform the recognition request:
req = VNCoreMLRequest.alloc().initWithModel_(vn_model).autorelease()
handler = VNImageRequestHandler.alloc().initWithData_options_(img_data, None).autorelease()
success = handler.performRequests_error_([req], None)
if success:
best_result = req.results()[0]
label = str(best_result.identifier())
confidence = best_result.confidence()
return {'label': label, 'confidence': confidence}
else:
return None
def classify_image(img):
buffer = io.BytesIO()
img.save(buffer, 'JPEG')
img_data = ns(buffer.getvalue())
return _classify_img_data(img_data)
def classify_asset(asset):
img_data = ns(asset.get_image_data().getvalue())
req = VNDetectTextRectanglesRequest.alloc().init()
req.reportCharacterBoxes = True
handler = VNImageRequestHandler.alloc().initWithData_options_(img_data, None).autorelease()
success = handler.performRequests_error_([req], None)
if success:
im = ui.ImageView()
pil_image = asset.get_image()
print(pil_image.size)
ui_image = asset.get_ui_image()
wim,him = ui_image.size
im.frame = (0,0,400,400*him/wim)
wi = im.width
hi = im.height
im.image = ui_image
im.present('sheet')
for i in range(0,len(req.results())):
observation = req.results()[i]
box = observation.boundingBox()
xb=box.origin.x
yb=box.origin.y
wb=box.size.width
hb=box.size.height
#print('x=',xb)
#print('y=',y )
#print('width=',w )
#print('height=',hb)
l = ui.Label()
l.frame = (xb*wi,yb*hi,wb*wi,hb*hi)
#print(l.frame)
#l.border_width = 1
#l.border_color = 'red'
im.add_subview(l)
#print(dir(observation))
confidence = observation.confidence()
#print('confidence', confidence)
for i_ch in range(0,len(observation.characterBoxes())):
ch_box = observation.characterBoxes()[i_ch]
box = ch_box.boundingBox()
x=box.origin.x
y=box.origin.y
w=box.size.width
h=box.size.height
#print('x=',x)
#print('y=',y)
#print('width=',w)
#print('height=',h)
l = ui.Label()
l.frame = (x*wi,yb*hi,w*wi,hb*hi)
#print(l.frame)
#l.border_width = 1
#l.border_color = 'blue'
im.add_subview(l)
print((int(x*wim),int(yb*him),int(w*wim),int(hb*him)))
pil_char = pil_image.crop((int(x*wim)-1,int(yb*him)-1,int((x+w)*wim)+1,int((yb+hb)*him)+1))
pil_char.show()
print(classify_image(pil_char))
#print(dir(ch_box))
#break
print('ok')
else:
print('error')
def main():
all_assets = photos.get_assets()
asset = photos.pick_asset(assets=all_assets)
if asset is None:
return
classify_asset(asset)
if __name__ == '__main__':
main()

@pavlinb install this model OCR.mlmodel
and my script works almost perfectly (F instead of E)

You are very kind, thanks.
You gave me lot of material for trying.
Are you using it with ipad?
@pavlinb yes sir, iPad mini 4
Do you know other mlmodels for OCR?
Regards.
Ah, sorry then. You answer so fast and I thought you are master of mlmodel.
Thanks anyway for the links.
@pavlinb I knew the script of omz and I've found and converted the Swift code when I've read your post.
Can you help me with one issue I can't fix on iPhone8?
link text
@pavlinb that's what we see 
Please post the script

Sorry,
Clink on green button ΠΠΠ’ΠΠΠΠ
@pavlinb I did, and after some questions I get the screen copy...
Here blue boxes are shifted from original text.
#!python3
'''
This is a demo of how you can use the CoreML framework (via objc_util) to classify images in Pythonista.
It downloads the trained 'MobileNet' CoreML model from the Internet, and uses it to classify images that
are either taken with the camera, or picked from the photo library.
'''
import requests
import os
import io
import photos
import dialogs
from PIL import Image
from objc_util import ObjCClass, nsurl, ns
import ui
# Configuration (change URL and filename if you want to use a different model):
MODEL_URL = 'https://docs-assets.developer.apple.com/coreml/models/MobileNet.mlmodel'
#MODEL_FILENAME = 'Alphanum_28x28.mlmodel'
MODEL_FILENAME = 'OCR.mlmodel'
#MODEL_FILENAME = 'frozen_east_text_detection.pb.py'
# Use a local path for caching the model file (no need to sync this with iCloud):
MODEL_PATH = os.path.join(os.path.expanduser('~/Documents'), MODEL_FILENAME)
# Declare/import ObjC classes:
MLModel = ObjCClass('MLModel')
VNCoreMLModel = ObjCClass('VNCoreMLModel')
VNCoreMLRequest = ObjCClass('VNCoreMLRequest')
VNImageRequestHandler = ObjCClass('VNImageRequestHandler')
VNDetectTextRectanglesRequest = ObjCClass('VNDetectTextRectanglesRequest')
def load_model():
'''Helper method for downloading/caching the mlmodel file'''
if not os.path.exists(MODEL_PATH):
print(f'Downloading model: {MODEL_FILENAME}...')
r = requests.get(MODEL_URL, stream=True)
file_size = int(r.headers['content-length'])
with open(MODEL_PATH, 'wb') as f:
bytes_written = 0
for chunk in r.iter_content(1024*100):
f.write(chunk)
print(f'{bytes_written/file_size*100:.2f}% downloaded')
bytes_written += len(chunk)
print('Download finished')
ml_model_url = nsurl(MODEL_PATH)
# Compile the model:
c_model_url = MLModel.compileModelAtURL_error_(ml_model_url, None)
# Load model from the compiled model file:
ml_model = MLModel.modelWithContentsOfURL_error_(c_model_url, None)
# Create a VNCoreMLModel from the MLModel for use with the Vision framework:
vn_model = VNCoreMLModel.modelForMLModel_error_(ml_model, None)
return vn_model
def _classify_img_data(img_data):
'''The main image classification method, used by `classify_image` (for camera images) and `classify_asset` (for photo library assets).'''
vn_model = load_model()
# Create and perform the recognition request:
req = VNCoreMLRequest.alloc().initWithModel_(vn_model).autorelease()
handler = VNImageRequestHandler.alloc().initWithData_options_(img_data, None).autorelease()
success = handler.performRequests_error_([req], None)
if success:
best_result = req.results()[0]
label = str(best_result.identifier())
confidence = best_result.confidence()
return {'label': label, 'confidence': confidence}
else:
return None
def classify_image(img):
buffer = io.BytesIO()
img.save(buffer, 'JPEG')
img_data = ns(buffer.getvalue())
return _classify_img_data(img_data)
def classify_asset(asset):
img_data = ns(asset.get_image_data().getvalue())
req = VNDetectTextRectanglesRequest.alloc().init()
req.reportCharacterBoxes = True
handler = VNImageRequestHandler.alloc().initWithData_options_(img_data, None).autorelease()
success = handler.performRequests_error_([req], None)
if success:
im = ui.ImageView()
pil_image = asset.get_image()
print(pil_image.size)
ui_image = asset.get_ui_image()
wim,him = ui_image.size
im.frame = (0,0,400,400*him/wim)
#im.frame = (0,0,141,64)
wi = im.width
hi = im.height
im.image = ui_image
im.content_mode = 1 #1
im.present()
for i in range(0,len(req.results())):
observation = req.results()[i]
box = observation.boundingBox()
xb=box.origin.x
yb=box.origin.y
wb=box.size.width
hb=box.size.height
#print('x=',xb)
#print('y=',y )
#print('width=',w )
#print('height=',hb)
l = ui.Label()
l.frame = (xb*wi,yb*hi,wb*wi,hb*hi)
#print(l.frame)
#l.border_width = 1
#l.border_color = 'red'
im.add_subview(l)
#print(dir(observation))
confidence = observation.confidence()
#print('confidence', confidence)
for i_ch in range(0,len(observation.characterBoxes())):
ch_box = observation.characterBoxes()[i_ch]
box = ch_box.boundingBox()
x=box.origin.x
y=box.origin.y
w=box.size.width
h=box.size.height
#print('x=',x)
#print('y=',y)
#print('width=',w)
#print('height=',h)
l = ui.Label()
l.frame = (x*wi,yb*hi,w*wi,hb*hi)
#print(l.frame)
l.border_width = 1
l.border_color = 'blue'
im.add_subview(l)
print((int(x*wim),int(yb*him),int(w*wim),int(hb*him)))
pil_char = pil_image.crop((int(x*wim)-1,int(yb*him)-1,int((x+w)*wim)+1,int((yb+hb)*him)+8))
pil_char.show()
print(classify_image(pil_char))
#print(dir(ch_box))
#break
print('ok')
else:
print('error')
def main():
all_assets = photos.get_assets()
asset = photos.pick_asset(assets=all_assets)
if asset is None:
return
classify_asset(asset)
if __name__ == '__main__':
main()
@pavlinb I guess you changed a little my script...
Yes, a little.
@pavlinb problem comes from you did not present as 'sheet'
Then width and height of imageview are not these ones we set at begin, easy isn'it? π
With βsheetβ even original image is extended to fullscreen. And regarding documentation of Pythonista sheet works on iPad only.
https://imgur.com/a/ogxpMPR
Sorry, as I work on iPad, I always forget that sheet does not exist on iPhone, this should be ok
mv = ui.View()
im = ui.ImageView()
pil_image = asset.get_image()
print(pil_image.size)
ui_image = asset.get_ui_image()
wim,him = ui_image.size
ws,hs = ui.get_screen_size()
if (ws/hs) > (wim/him):
h = ws*him/wim
im.frame = (0,(hs-h)/2,ws,h)
else:
w = hs*wim/him
im.frame = ((ws-w)/2,0,w,hs)
mv.add_subview(im)
wi = im.width
hi = im.height
im.image = ui_image
im.content_mode = 1 #1
mv.present()
@pavlinb Tried?
@pavlinb did you try to replace
im = ui....
.
.
.
im.present()
By the lines just before?
Hi, it's like that:

@pavlinb for me, with 'full_screen'

Did you try on more natural images?
@pavlinb no, sincerely, I don't know anything about mlmodel. When I had read your topic, I only remembered an omz script and I have tried to add objectivec code for your request about VNDetectTextRectanglesRequest. That's all folks π
Here why it doesnβt work for me:
@pavlinb Imgur answer...
Oops! We couldn't find that page
Ok now
@pavlinb As I don't have this iPhone, could you print wim,him,ws,hs so I could simulate, thanks
@pavlinb could you also post the analyzed image in Imgur?
@cvp wim = 141, him = 64 , ws = 375, hs = 667

@pavlinb did you not turn your iPhone during test?
I simulate your dimensions and I get this.
I agree that blue rectangles are not very good but that's so, I think with this code

@cvp I'll play with dimensions.
@cvp In landscape view image looks better, but in portrait view is sketched and goes outside of the screen.
@pavlinb It is strange because I simulated your iPhone in portrait mode
I think I did an error, try < instead of >, and sorry π
if (ws/hs) < (wim/him):
@cvp Much better.
Only blue boxes are little shifted.
But for now it's not important.
it seems that VNRecognizeTextRequest exist in iOS 13.0+ Beta, regarding link.
If I upgrade to iOS 13 Beta should I be able to call it from Pythonista?
@pavlinb No idea, sorry. I think that yes, you could.
@pavlinb ask somebody who has installed iOS 13 beta, to test
from objc_util import *
VNRecognizeTextRequest = ObjCClass('VNRecognizeTextRequest').alloc()
If it runs without error, that should be ok for next development
If you get
ValueError: no Objective-C class named 'b'VNRecognizeTextRequest'' found
It is not good...
@cvp Compiled w/o error in iOS13_7 beta.
Let see if it works good.
@pavlinb π€
@cvp Works good and no need models.
@pavlinb Super. I'm waiting for the official ios13...
@cvp, me too, and now much more eagerly. Thanks @pavlinb for the testing.
Is there a way to process every frame from camera?
Not only photos from gallery?
@pavlinb I think that there is a topic about that in the forum, but not sure
Not exactly what you want but..... see here
@cvp Checking now...
@cvp said:
@pavlinb I knew the script of omz and I've found and converted the Swift code when I've read your post.
Do you have some converter from Swift to Python or you did it manually?
@pavlinb Manually π’ and I prefer to convert an Objectivec than a Swift...
@cvp I found a project in Swift that is good for Detecting in real time. But for me is hard to translate to Python.
@pavlinb You can post the url but I don't promise anything, if too long or too complex for me
@cvp https://developer.apple.com/documentation/vision/recognizing_objects_in_live_capture
There is download button at the top.
@pavlinb I think you (we?) Could do it, starting from @JonB script https://github.com/jsbain/objc_hacks/blob/master/live_camera_view.py
@cvp I thought the same :-)
@pavlinb First draft here
back camera on my iPad mini 4 does not work, I don't know why
@cvp class LiveCameraView(ui.View):
''' device=1== front, device=2==back
back camera is device=0
@pavlinb Thanks, gist corrected
@cvp A big step in right direction. Thanks.
@pavlinb Next step is to integrate AVCaptureVideoDataOutput from example here
If you follow the logic of this code, you will fall on UIImageFromSampleBuffer function
which is bery very complex (for me, at least)
@cvp My fifty cents link
@pavlinb Super. Do you think I have to try to intercept real frames of the camera or are you happy with this way of taking still photos?
@pavlinb On my (old) iPad mini 4, the script takes 2 photos per second, far from a real time process for frames...
@cvp For my current needs itβs perfect.
@pavlinb π
@cvp But it would be good if i can avoid shutter sound. I assume it is caused by the method used to capture frames?
@pavlinb for info, AVCaptureStillImageOutput is deprecated, we should use AVCapturePhotoOutput
@pavlinb said:
But it would be good if i can avoid shutter sound. I assume it is caused by the method used to capture frames?
Following this,
I tried this without success
c.AudioServicesDisposeSystemSoundID(1108,restype=None, argtypes=[c_int32]) # 1108 = shutter sound
@cvp said:
@pavlinb Next step is to integrate AVCaptureVideoDataOutput from example here
If you follow the logic of this code, you will fall on UIImageFromSampleBuffer function
which is bery very complex (for me, at least)
Hi, did you manage to run given code? It seems it contains number of functions (ready to use or not).
Regards.
@pavlinb No, sorry.
which one of the code examples in this thread is the BEST one to copy and paste into my ipad mini 2 ?????????? for text detection for getting the starting numbers of a sodoku puzzle by taking a picture of a sodoku puzzle in a newspaper
https://forum.omz-software.com/topic/3050/snapsudoku
@sodoku I'm sorry but my iPad mini 4 is in iOS 13 with the last beta of Pythonista and my little script does not work very good any more...
@sodoku Shortest code I can give, without searching first the rectangles containing text, but assuming the image is a square grid with only one digit per cell.
You have to set (or ask, up to you) the number of cells per row/column, here 9, and the percentage (here 15) of the cell dimensions for the grid-lines. Even if the image seems to be entirely displayed, it is not fully because it is overriden by buttons where background_image is the cropped part which will be transmitted to the VNCoreMLRequest for trying to recognize the digit.
And, as you can see in my example, it does not work correctly on my ipad mini 4 under ios 13 and latest Pythonista beta. You could try on your idevice but I can't help more, sorry. Hoping it will be better for you π’
import os
import io
import photos
import dialogs
from PIL import Image
from objc_util import ObjCClass, nsurl, ns
import ui
MODEL_FILENAME = 'MNIST.mlmodel'
#MODEL_FILENAME = 'MNISTClassifier.mlmodel'
#MODEL_FILENAME = 'OCR.mlmodel'
#MODEL_FILENAME = 'Alphanum_28x28.mlmodel'
# Use a local path for caching the model file
MODEL_PATH = os.path.join(os.path.expanduser('~/Documents/'), MODEL_FILENAME)
# Declare/import ObjC classes:
MLModel = ObjCClass('MLModel')
VNCoreMLModel = ObjCClass('VNCoreMLModel')
VNCoreMLRequest = ObjCClass('VNCoreMLRequest')
VNImageRequestHandler = ObjCClass('VNImageRequestHandler')
def pil2ui(imgIn):
with io.BytesIO() as bIO:
imgIn.save(bIO, 'PNG')
imgOut = ui.Image.from_data(bIO.getvalue())
del bIO
return imgOut
def load_model():
global vn_model
ml_model_url = nsurl(MODEL_PATH)
# Compile the model:
c_model_url = MLModel.compileModelAtURL_error_(ml_model_url, None)
# Load model from the compiled model file:
ml_model = MLModel.modelWithContentsOfURL_error_(c_model_url, None)
# Create a VNCoreMLModel from the MLModel for use with the Vision framework:
vn_model = VNCoreMLModel.modelForMLModel_error_(ml_model, None)
return vn_model
def _classify_img_data(img_data):
global vn_model
# Create and perform the recognition request:
req = VNCoreMLRequest.alloc().initWithModel_(vn_model).autorelease()
handler = VNImageRequestHandler.alloc().initWithData_options_(img_data, None).autorelease()
success = handler.performRequests_error_([req], None)
if success:
best_result = req.results()[0]
label = str(best_result.identifier())
confidence = best_result.confidence()
return {'label': label, 'confidence': confidence}
else:
return None
def classify_image(img):
buffer = io.BytesIO()
img.save(buffer, 'JPEG')
img_data = ns(buffer.getvalue())
return _classify_img_data(img_data)
def classify_asset(asset):
mv = ui.View()
mv.background_color = 'white'
im = ui.ImageView()
pil_image = asset.get_image()
print(pil_image.size)
ui_image = asset.get_ui_image()
n_squares = 9
d_grid = 15 # % around the digit
wim,him = pil_image.size
ws,hs = ui.get_screen_size()
if (ws/hs) < (wim/him):
h = ws*him/wim
im.frame = (0,(hs-h)/2,ws,h)
else:
w = hs*wim/him
im.frame = ((ws-w)/2,0,w,hs)
print(wim,him,ws,hs)
mv.add_subview(im)
wi = im.width
hi = im.height
im.image = ui_image
im.content_mode = 1 #1
mv.frame = (0,0,ws,hs)
mv.present('fullscreen')
dx = wim/n_squares
dy = him/n_squares
d = dx*d_grid/100
dl = int((wi/n_squares)*d_grid/100)
for ix in range(n_squares):
x = ix*dx
for iy in range(n_squares):
y = iy*dy
pil_char = pil_image.crop((int(x+d),int(y+d),int(x+dx-d),int(y+dy-d)))
l = ui.Button()
l.frame = (int(ix*wi/n_squares)+dl, int(iy*hi/n_squares)+dl, int(wi/n_squares)-2*dl, int(hi/n_squares)-2*dl)
l.border_width = 1
l.border_color = 'red'
l.tint_color = 'red'
ObjCInstance(l).button().contentHorizontalAlignment= 1 # left
l.background_image = pil2ui(pil_char)
im.add_subview(l)
l.title = classify_image(pil_char)['label']
def main():
global vn_model
vn_model = load_model()
all_assets = photos.get_assets()
asset = photos.pick_asset(assets=all_assets)
if asset is None:
return
classify_asset(asset)
if __name__ == '__main__':
main()

@cvp Cool looking output! I am trying to follow along at home but have a few questions:
1. What image (of a sudoku puzzle) did you start with?
2. How did you constrain the labels to just 0-9?
3. Could we have a GitHub repo for this effort? With goals:
1. Recognize sudoku digits from a still image
2. Recognize sudoku digits from a real-time image
@ccc
1. I use an image of a sudoku grid from a Google search here
2. Use of MNIST.mlmodel for only digits
3. Not sure that it could be interesting because iOS 13 offers now a better way via VNRecognizeTextRequest but it was only to answer to @sodoku question
See topic with @mikael code

@ccc Do you have a iDevice under iOS < 13, just to know if digits recognition is working because my above test is not ok under iOS 13.
@ccc Really because you ask it π Github
For non-Pythonista platforms... https://github.com/neeru1207/AI_Sudoku
Vision OCR produce prety good results.
But has someone succeeded to OCR text with indexes or power signs?
(https://commons.wikimedia.org/wiki/File:Quadratic-formula.jpg)
What about OCR in realtime video mode? Is it possible?
See this topic: https://forum.omz-software.com/topic/6016/recognize-text-from-picture