[Pycodeocr-main] SF.net SVN: pycodeocr:[65] branches/redesign/Recognition
Status: Beta
Brought to you by:
drtrigon
From: <la...@us...> - 2014-08-24 10:19:39
|
Revision: 65 http://sourceforge.net/p/pycodeocr/code/65 Author: laserb Date: 2014-08-24 10:19:30 +0000 (Sun, 24 Aug 2014) Log Message: ----------- add more documentation Modified Paths: -------------- branches/redesign/Recognition/Barcode.py branches/redesign/Recognition/ESR.py branches/redesign/Recognition/ImageRecognition.py branches/redesign/Recognition/PDF417.py Modified: branches/redesign/Recognition/Barcode.py =================================================================== --- branches/redesign/Recognition/Barcode.py 2014-08-24 10:18:01 UTC (rev 64) +++ branches/redesign/Recognition/Barcode.py 2014-08-24 10:19:30 UTC (rev 65) @@ -9,7 +9,7 @@ check = not (data['type'] == "unknown") if check: check = check and ( int(data['crc']) == 0 ) # CRC error check? - check = check and (float(data['error']) < 0.15) # recognizion errors? + check = check and (float(data['error']) < 0.15) # recognition errors? print 'type:' +" "+ data['type'] print 'chars:' +" "+ data['chars'] print 'crc:' +" "+ data['crc'] @@ -17,7 +17,7 @@ data = data['code'] else: data = "type: " + data['type'] - return check + return (check, data) def setImage(self,image): self.image = image Modified: branches/redesign/Recognition/ESR.py =================================================================== --- branches/redesign/Recognition/ESR.py 2014-08-24 10:18:01 UTC (rev 64) +++ branches/redesign/Recognition/ESR.py 2014-08-24 10:19:30 UTC (rev 65) @@ -1,62 +1,104 @@ # -*- coding: utf8 -*- +## +# @file ESR.py +# @package ImageRecognition +# @authors drtrigon, laserb +# @version 2.0b +# @brief Recognize codeline from ESR +# +# $Id: $ +# +# +# @section Description +# +# Extract information from ESR. +# +# @section Dependencies +# +# @li Python modules needed are: os, re. +# @li Modules: utils, Recognition +# from Recognition import ImageRecognition import utils import re, os +## Extract information from ESR class ESR(ImageRecognition.ImageRecognitionBase): + ## init def __init__(self): super(ESR,self).__init__("ESR") self.temp_name = "image_object_temp_file" self.valid_code_len = [ 53, 43, 42, 32 ] # valid code lenghts - + ## @overwrite verification def verify(self): + # correct for common char recognition errors self.recognitionData.setContent(self.char_correction(self.recognitionData.getContent())) + # get data data = self.recognitionData.getContent() + + # check for invalid characters check = (not "?" in data) or (not "!" in data) # any unrecognized char in code? check = check and ( len(data) in self.valid_code_len ) # correct code len? if not check: return (False, "Invalid length or invalid characters") + # extract amount, reference and account from codeline tmp = data[:-1].split(">") amount = tmp[0] tmp = tmp[1].split("+ ") reference = tmp[0] account = tmp[1] + # do checksum for amount, reference and account if not self.checksum(amount): return (False, "Checksum for amount failed.") if not self.checksum(reference): return (False, "Checksum for reference failed.") if not self.checksum(account): return (False, "Checksum for account failed.") + + # return true if all checks are positive return (True, "") # thanks to http://www.hosang.ch/modulo10.aspx # much more details http://www.sic.ch/de/dl_tkicch_dta.pdf ( page 51 ) # @see http://www.bundesbank.de/download/zahlungsverkehr/zv_pz201012.pdf # @see http://www.bundesbank.de/zahlungsverkehr/zahlungsverkehr_pruefziffernberechnung.php + # @return True if checksum is valid, false otherwise def checksum(self, number): tabelle = [0,9,4,6,8,2,7,1,3,5] uebertrag = 0 + # iterate over each character for i in range(len(number)-1): uebertrag = tabelle[(uebertrag + int(number[i]) )%10] uebertrag = (10-uebertrag)%10 + + # check if last digit equals checksum return str(uebertrag) == number[-1] + # @overwrite recognizeImage def recognizeImage(self, image, rotate = None): super(ESR,self).recognizeImage(image) + + # if no rotation is set, use default: [0, 90, 90, 90] if rotate == None: rotate = self.rotate + + # try all rotation and test if data is valid for angle in rotate: + # convert image self.recognitionData.convert({"-depth":str(self.depth),"-rotate":str(angle)}, "convert: unable to open image") + + # do recognition with tesseract self.recognitionData.getImage().save(self.temp_name + ".png") runExternal = utils.RunExternal("tesseract %s %s" % (self.temp_name + ".png",self.temp_name+"tesseract"),error_msg="Couldn't find text") runExternal() + # Simple workaround: The program simply continuous if no text is found at all # and therefore no txt-file is generated. if not os.path.exists(self.temp_name+"tesseract.txt"): @@ -67,10 +109,16 @@ df.close() os.remove(self.temp_name + "tesseract.txt") # clean-up os.remove(self.temp_name + ".png") # clean-up + + # set content self.recognitionData.setContent(data) + + # check if data is valid (check, msg) = self.verify() if check: break + + # return the data return self.recognitionData.getContent() ## Character correction after recogition (on basis that there should be numbers and few special chars). Modified: branches/redesign/Recognition/ImageRecognition.py =================================================================== --- branches/redesign/Recognition/ImageRecognition.py 2014-08-24 10:18:01 UTC (rev 64) +++ branches/redesign/Recognition/ImageRecognition.py 2014-08-24 10:19:30 UTC (rev 65) @@ -1,70 +1,104 @@ # -*- coding: utf8 -*- ## -# @file File.py -# @package Source -# @extends SourceBase +# @file ImageRecognition.py +# @package ImageRecognition # @authors drtrigon, laserb # @version 2.0b -# @brief Base class for sources. +# @brief Base class for image recognition. # # $Id: $ # # # @section Description # -# Get an image from file. +# Extract information from image. # # @section Dependencies # +# @li Python modules needed are: os, gtk, PIL. +# @li Modules: utils # import os, gtk from PIL import Image import utils +## Base class for image recognition. This class is not useful by itself. +# Extend this class for a specific recognition technique. class ImageRecognitionBase(object): + ## init def __init__(self, name): self.name = name self.depth = 8 self.rotate = [0,90,90,90] + ## Verify if data is valid. + # @pre image acquired and recognized + # @post information extracted + # @return (boolean, data) True if data is valid, false otherwise + # if data is valid return data, otherwise return error message def verify(self): return (True, "") + ## get image and associated data + # @pre image recognized + # @return image and asscociated data def getRecognitionData(self): return self.recognitionData + ## recognize image + # @pre image acquired + # @param image + # @post information is extracted + # @return extracted information def recognizeImage(self,image): recognitionData = RecognitionData(image) self.recognitionData = recognitionData return self.recognitionData.getContent() + ## get name + # @return name def getName(self): return self.name + ## get rotate + # @return rotate def getRotate(self): return self.rotate + ## set rotate + # @param rotation angle def setRotate(self, rotate): self.rotate = rotate + ## get depth + # @return depth def getDepth(self): return self.depth + ## set depth + # @param depth def setDepth(self, depth): self.depth = depth - +## store for image and associated data class RecognitionData(object): + ## init def __init__(self, image): self.image = image self.temp_name = "image_object_temp_file" self.content = "" + ## get image + # @return image def getImage(self): return self.image + ## convert initial image to prepare it for recognition + # @pre image acquired + # @param convertion options, error message + # @post image optimized for recognition def convert(self,options,msg): path = os.path.abspath(".") orig = os.path.join(path,self.temp_name + ".png") @@ -80,12 +114,21 @@ os.remove(orig) # clean-up os.remove(new) # clean-up + ## set information content of image + # @pre image recognized + # @param information from image + # @post information set def setContent(self, content): self.content = content + ## get information associated with image + # @return information content def getContent(self): return self.content + ## save content to clipboard + # @pre information set + # @post information saved in clipboard def saveClipboard(self): # get the clipboard clipboard = gtk.clipboard_get() @@ -94,21 +137,35 @@ # make our data available to other applications clipboard.store() - +## Run the recognition and verify the information class RunRecognition(object): + ## init def __init__(self, source, data, imageRecognition): self.source = source self.data = data self.imageRecognition = imageRecognition + ## call + # @return (boolean, data), True if data is valid, False otherwise + # data if the acquired data is image, error-message otherwise def __call__(self): image = self.source.getImage(self.data) return self.recognize(image) + ## recognize image + # @return (boolean, data), True if data is valid, False otherwise + # data if the acquired data is image, error message otherwise def recognize(self, image): + # do recognition self.imageRecognition.recognizeImage(image) + + # verify data (check, msg) = self.imageRecognition.verify() + + # if data is valid, save to clipboard and return data if check: self.imageRecognition.getRecognitionData().saveClipboard() return (True, self.imageRecognition.getRecognitionData().getContent()) + + # if data is invalid return error message return (False, msg) \ No newline at end of file Modified: branches/redesign/Recognition/PDF417.py =================================================================== --- branches/redesign/Recognition/PDF417.py 2014-08-24 10:18:01 UTC (rev 64) +++ branches/redesign/Recognition/PDF417.py 2014-08-24 10:19:30 UTC (rev 65) @@ -5,7 +5,7 @@ super(PDF417,self).__init("PDF417") def verify(self): - return True + return (True, "") def setImage(self,image): self.image = image This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |