Skip to content

Commit

Permalink
sated rubocop
Browse files Browse the repository at this point in the history
  • Loading branch information
sebastianMindee committed Jun 16, 2023
1 parent bd959dc commit 0ed625c
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 43 deletions.
5 changes: 4 additions & 1 deletion lib/mindee/parsing/api_response.rb
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,10 @@ class ApiResponse
# @param http_response [Hash]
def initialize(prediction_class, http_response)
@api_request = Mindee::ApiRequest.new(http_response['api_request']) if http_response.key?('api_request')
if http_response.key?('document') && (!http_response.key?('job') || http_response['job']['status'] == 'completed') && @api_request.status == RequestStatus::SUCCESS
if http_response.key?('document') &&
(!http_response.key?('job') ||
http_response['job']['status'] == 'completed') &&
@api_request.status == RequestStatus::SUCCESS
@document = Mindee::Document.new(prediction_class, http_response['document'])
end
@job = Mindee::Job.new(http_response['job']) if http_response.key?('job')
Expand Down
4 changes: 2 additions & 2 deletions lib/mindee/parsing/document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ class Document
# @param http_response [Hash]
# @return [Mindee::Ocr::Ocr]
def self.load_ocr(http_response)
ocr_prediction = http_response.fetch("ocr", nil)
return nil if ocr_prediction.nil? || ocr_prediction.fetch("mvision-v1", nil).nil?
ocr_prediction = http_response.fetch('ocr', nil)
return nil if ocr_prediction.nil? || ocr_prediction.fetch('mvision-v1', nil).nil?

Ocr(ocr_prediction)
end
Expand Down
77 changes: 40 additions & 37 deletions lib/mindee/parsing/prediction/common_fields/ocr.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ class OcrLine < Array
# @param prediction [Hash, nil]
# @param array [Array, nil]
# @param page_id [Integer, nil]
def initialize(prediction=nil, from_array=nil)
if !prediction.nil?
def initialize(prediction = nil, from_array = nil)
if !prediction.nil?
super(prediction.map { |word_prediction| OcrWord.new(word_prediction) })
elsif !from_array.nil?
super(from_array)
Expand All @@ -42,8 +42,10 @@ def initialize(prediction=nil, from_array=nil)

# Sort the words on the line from left to right.
def sort_on_x
from_array = sort { |word1, word2| Geometry.get_min_max_x(word1.polygon).min <=> Geometry.get_min_max_x(word2.polygon).min }
OcrLine.new(prediction=nil, from_array=from_array)
from_array = sort do |word1, word2|
Geometry.get_min_max_x(word1.polygon).min <=> Geometry.get_min_max_x(word2.polygon).min
end
OcrLine.new(nil, from_array)
end

def to_s
Expand All @@ -61,11 +63,10 @@ class OcrPage

def initialize(prediction)
@lines = []
all_words = []
prediction["all_words"].each do |word_prediction|
all_words.push(OcrWord.new(word_prediction))
@all_words = []
prediction['all_words'].each do |word_prediction|
@all_words.push(OcrWord.new(word_prediction))
end
@all_words = all_words
end

# All the words on the page, ordered in lines.
Expand All @@ -81,13 +82,36 @@ def to_s

out_str = String.new
lines.map do |line|
out_str << "#{line.to_s}\n" unless line.to_s.strip.empty?
out_str << "#{line}\n" unless line.to_s.strip.empty?
end
out_str.strip
end

private

# Helper function that iterates through all the words and compares them to a candidate
# @param sorted_words [Array<OcrWord>]
# @param current [OcrWord]
# @param indexes [Array<Integer>]
# @param current [Array<OcrLine>]
def parse_one(sorted_words, current, indexes, lines)
line = OcrLine.new([])
sorted_words.each_with_index do |word, idx|
next if indexes.include?(idx)

if current.nil?
current = word
indexes.push(idx)
line = OcrLine.new([])
line.push(word)
elsif words_on_same_line?(current, word)
line.push(word)
indexes.push(idx)
end
end
lines.push(line.sort_on_x) if line.any?
end

# Order all the words on the page into lines.
# @param current [OcrWord, nil]
# @param indexes [Array<Integer>]
Expand All @@ -99,28 +123,10 @@ def to_lines
lines = []

# make sure words are sorted from top to bottom
@all_words = all_words.sort_by { |word| Geometry.get_min_max_y(word.polygon).min }
@all_words.each do
line = OcrLine.new([])
@all_words.each_with_index do |word, idx|
if indexes.include?(idx)
next
elsif current.nil?
current = word
indexes.push(idx)
line = OcrLine.new([])
line.push(word)
else
if words_on_same_line?(current, word)
line.push(word)
indexes.push(idx)
end
end
end
all_words = @all_words.sort_by { |word| Geometry.get_min_max_y(word.polygon).min }
all_words.each do
parse_one(all_words, current, indexes, lines)
current = nil
if line.any?
lines.push(line.sort_on_x)
end
end
lines
end
Expand All @@ -134,7 +140,6 @@ def words_on_same_line?(current_word, next_word)
next_in_current = next_word.polygon.point_in_y?(current_word.polygon.centroid)
current_in_next || next_in_current
end

end

# Mindee Vision V1.
Expand All @@ -144,12 +149,10 @@ class MVisionV1
attr_reader :pages

def initialize(prediction)
pages = []
prediction["pages"].each do |page_prediction|
pages.push(OcrPage.new(page_prediction))
@pages = []
prediction['pages'].each do |page_prediction|
@pages.push(OcrPage.new(page_prediction))
end
@pages = pages
# @pages = prediction["pages"].each { |page_prediction| OcrPage.new(page_prediction) }
end

def to_s
Expand All @@ -169,7 +172,7 @@ class Ocr
attr_reader :mvision_v1

def initialize(prediction)
@mvision_v1 = MVisionV1.new(prediction["mvision-v1"])
@mvision_v1 = MVisionV1.new(prediction['mvision-v1'])
end

def to_s
Expand Down
6 changes: 3 additions & 3 deletions spec/ocr_spec.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# frozen_string_literal: true

require 'json'
require 'mindee'

Expand All @@ -11,19 +12,18 @@
json_data = load_json(DIR_OCR, 'complete_with_ocr.json')
it 'should extract ocr data from a document' do
expected_text = read_file(DIR_OCR, 'ocr.txt')
ocr = Mindee::Ocr::Ocr.new(json_data["document"]["ocr"])
ocr = Mindee::Ocr::Ocr.new(json_data['document']['ocr'])
expect(ocr.to_s).to eq(expected_text)
expect(ocr.mvision_v1.pages[0].to_s).to eq(expected_text)
end
end
end


# def test_response
# json_data = json.load(open("./data/ocr/complete_with_ocr.json"))
# with open("./tests/data/ocr/ocr.txt") as file_handle:
# expected_text = file_handle.read()
# ocr = Ocr(json_data["document"]["ocr"])
# assert str(ocr) == expected_text
# assert str(ocr.mvision_v1.pages[0]) == expected_text
# end
# end

0 comments on commit 0ed625c

Please sign in to comment.