Skip to content

Commit

Permalink
✨ Add OCR common field (#37)
Browse files Browse the repository at this point in the history
  • Loading branch information
sebastianMindee committed Jun 19, 2023
1 parent fb61020 commit fb55a57
Show file tree
Hide file tree
Showing 7 changed files with 278 additions and 3 deletions.
56 changes: 56 additions & 0 deletions lib/mindee/geometry.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,23 @@
module Mindee
# Various helper functions for geometry.
module Geometry
# A set of minimum and maximum values.
class MinMax
# Minimum
# @return [Float]
attr_reader :min
# Maximum
# @return [Float]
attr_reader :max

# @param min [Float]
# @param max [Float]
def initialize(min, max)
@min = min
@max = max
end
end

# A relative set of coordinates (X, Y) on the document.
class Point
# @return [Float]
Expand Down Expand Up @@ -71,7 +88,20 @@ def [](key)
end
end

# Contains any number of vertex coordinates (Points).
class Polygon < Array
# Get the central point (centroid) of the polygon.
def centroid
Geometry.get_centroid(self)
end

# Determine if the Point is in the Polygon's Y-axis.
# @param point [Mindee::Geometry::Point]
# @return [Boolean]
def point_in_y?(point)
min_max = Geometry.get_min_max_y(self)
min_max.min <= point.y && point.y <= min_max.max
end
end

# Transform a prediction into a Quadrilateral.
Expand Down Expand Up @@ -114,5 +144,31 @@ def self.get_bounding_box(vertices)
Point.new(x_min, y_max)
)
end

# Get the central point (centroid) given a sequence of points.
# @param points [Array<Mindee::Geometry::Point>]
# @return [Mindee::Geometry::Point]
def self.get_centroid(points)
vertices_count = points.size
x_sum = points.map(&:x).sum
y_sum = points.map(&:y).sum
Point.new(x_sum / vertices_count, y_sum / vertices_count)
end

# Get the maximum and minimum Y value given a sequence of points.
# @param points [Array<Mindee::Geometry::Point>]
# @return [Mindee::Geometry::MinMax]
def self.get_min_max_y(points)
coords = points.map(&:y)
MinMax.new(coords.min, coords.max)
end

# Get the maximum and minimum X value given a sequence of points.
# @param points [Array<Mindee::Geometry::Point>]
# @return [Mindee::Geometry::MinMax]
def self.get_min_max_x(points)
coords = points.map(&:x)
MinMax.new(coords.min, coords.max)
end
end
end
7 changes: 5 additions & 2 deletions lib/mindee/parsing/api_response.rb
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,14 @@ class ApiResponse
# @param prediction_class [Class<Mindee::Prediction::Prediction>]
# @param http_response [Hash]
def initialize(prediction_class, http_response)
if http_response.key?('document') && (!http_response.key?('job') || http_response['job']['status'] == 'completed')
@api_request = Mindee::ApiRequest.new(http_response['api_request']) if http_response.key?('api_request')
if http_response.key?('document') &&
(!http_response.key?('job') ||
http_response['job']['status'] == 'completed') &&
@api_request.status == RequestStatus::SUCCESS
@document = Mindee::Document.new(prediction_class, http_response['document'])
end
@job = Mindee::Job.new(http_response['job']) if http_response.key?('job')
@api_request = Mindee::ApiRequest.new(http_response['api_request']) if http_response.key?('api_request')
end
end
end
12 changes: 12 additions & 0 deletions lib/mindee/parsing/document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,25 @@ class Document
attr_reader :name
# @return [String] Mindee ID of the document
attr_reader :id
# @return [Mindee::Ocr::Ocr, nil]
attr_reader :ocr

# @param http_response [Hash]
# @return [Mindee::Ocr::Ocr]
def self.load_ocr(http_response)
ocr_prediction = http_response.fetch('ocr', nil)
return nil if ocr_prediction.nil? || ocr_prediction.fetch('mvision-v1', nil).nil?

Ocr(ocr_prediction)
end

# @param prediction_class [Class<Mindee::Prediction::Prediction>]
# @param http_response [Hash]
def initialize(prediction_class, http_response)
@id = http_response['id']
@name = http_response['name']
@inference = Mindee::Inference.new(prediction_class, http_response['inference'])
@ocr = self.class.load_ocr(http_response)
end

def to_s
Expand Down
1 change: 1 addition & 0 deletions lib/mindee/parsing/prediction/common_fields.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@
require_relative 'common_fields/position'
require_relative 'common_fields/tax'
require_relative 'common_fields/text'
require_relative 'common_fields/ocr'
2 changes: 1 addition & 1 deletion lib/mindee/parsing/prediction/common_fields/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
module Mindee
# Base Field object, upon which fields and feature fields are built
class AbstractField
# @return [Array<Array<Float>>]
# @return [Mindee::Geometry::Quadrilateral]
attr_reader :bounding_box
# @return [Mindee::Geometry::Polygon]
attr_reader :polygon
Expand Down
183 changes: 183 additions & 0 deletions lib/mindee/parsing/prediction/common_fields/ocr.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
# frozen_string_literal: true

module Mindee
module Ocr
# A single word.
class OcrWord
# The confidence score, value will be between 0.0 and 1.0
# @return [Float]
attr_accessor :confidence
# @return [String]
attr_reader :text
# @return [Mindee::Geometry::Quadrilateral]
attr_reader :bounding_box
# @return [Mindee::Geometry::Polygon]
attr_reader :polygon

# @param prediction [Hash]
def initialize(prediction)
@text = prediction['text']
@confidence = prediction['confidence']
@polygon = Geometry.polygon_from_prediction(prediction['polygon'])
@bounding_box = Geometry.get_bounding_box(@polygon) unless @polygon.nil? || @polygon.empty?
end

def to_s
@text.to_s
end
end

# A list of words which are on the same line.
class OcrLine < Array
# @param prediction [Hash, nil]
# @param array [Array, nil]
# @param page_id [Integer, nil]
def initialize(prediction = nil, from_array = nil)
if !prediction.nil?
super(prediction.map { |word_prediction| OcrWord.new(word_prediction) })
elsif !from_array.nil?
super(from_array)
end
end

# Sort the words on the line from left to right.
def sort_on_x
from_array = sort do |word1, word2|
Geometry.get_min_max_x(word1.polygon).min <=> Geometry.get_min_max_x(word2.polygon).min
end
OcrLine.new(nil, from_array)
end

def to_s
each(&:to_s).join(' ')
end
end

# OCR extraction for a single page.
class OcrPage
# All the words on the page, in semi-random order.
# @param all_words [Array<OcrWord>]
attr_reader :all_words
# @param lines [Array<OcrLines>]
attr_reader :lines

def initialize(prediction)
@lines = []
@all_words = []
prediction['all_words'].each do |word_prediction|
@all_words.push(OcrWord.new(word_prediction))
end
end

# All the words on the page, ordered in lines.
# @return [Array<OcrLine>]
def all_lines
@lines = to_lines if @lines.empty?
@lines
end

def to_s
lines = all_lines
return '' if lines.empty?

out_str = String.new
lines.map do |line|
out_str << "#{line}\n" unless line.to_s.strip.empty?
end
out_str.strip
end

private

# Helper function that iterates through all the words and compares them to a candidate
# @param sorted_words [Array<OcrWord>]
# @param current [OcrWord]
# @param indexes [Array<Integer>]
# @param current [Array<OcrLine>]
def parse_one(sorted_words, current, indexes, lines)
line = OcrLine.new([])
sorted_words.each_with_index do |word, idx|
next if indexes.include?(idx)

if current.nil?
current = word
indexes.push(idx)
line = OcrLine.new([])
line.push(word)
elsif words_on_same_line?(current, word)
line.push(word)
indexes.push(idx)
end
end
lines.push(line.sort_on_x) if line.any?
end

# Order all the words on the page into lines.
# @param current [OcrWord, nil]
# @param indexes [Array<Integer>]
# @param lines [Array<OcrLine>]
# @return [Array<OcrLine>]
def to_lines
current = nil
indexes = []
lines = []

# make sure words are sorted from top to bottom
all_words = @all_words.sort_by { |word| Geometry.get_min_max_y(word.polygon).min }
all_words.each do
parse_one(all_words, current, indexes, lines)
current = nil
end
lines
end

# Determine if two words are on the same line.
# @param current_word [Mindee::Ocr::OcrWord]
# @param next_word [Mindee::Ocr::OcrWord]
# @return Boolean
def words_on_same_line?(current_word, next_word)
current_in_next = current_word.polygon.point_in_y?(next_word.polygon.centroid)
next_in_current = next_word.polygon.point_in_y?(current_word.polygon.centroid)
current_in_next || next_in_current
end
end

# Mindee Vision V1.
class MVisionV1
# List of pages.
# @param pages [Array<OcrPage>]
attr_reader :pages

def initialize(prediction)
@pages = []
prediction['pages'].each do |page_prediction|
@pages.push(OcrPage.new(page_prediction))
end
end

def to_s
out_str = String.new
@pages.map do |page|
out_str << "\n"
out_str << page.to_s
end
out_str.strip
end
end

# OCR extraction from the entire document.
class Ocr
# Mindee Vision v1 results.
# @return [Mindee::Ocr::MVisionV1]
attr_reader :mvision_v1

def initialize(prediction)
@mvision_v1 = MVisionV1.new(prediction['mvision-v1'])
end

def to_s
@mvision_v1.to_s
end
end
end
end
20 changes: 20 additions & 0 deletions spec/ocr_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# frozen_string_literal: true

require 'json'
require 'mindee'

require_relative './data'

DIR_OCR = File.join(DATA_DIR, 'ocr')

describe Mindee::Ocr::Ocr do
context 'An OCR extraction' do
json_data = load_json(DIR_OCR, 'complete_with_ocr.json')
it 'should extract ocr data from a document' do
expected_text = read_file(DIR_OCR, 'ocr.txt')
ocr = Mindee::Ocr::Ocr.new(json_data['document']['ocr'])
expect(ocr.to_s).to eq(expected_text)
expect(ocr.mvision_v1.pages[0].to_s).to eq(expected_text)
end
end
end

0 comments on commit fb55a57

Please sign in to comment.