pyWinAuto: c:\.projects\py_pywinauto\pywinauto\findbestmatch.py
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021"Module to find the closest match of a string in a list"
0022
0023__revision__ = "$Revision: 607 $"
0024
0025import re
0026import difflib
0027
0028import fuzzydict
0029
0030
0031
0032
0033
0034
0035
0036import sets
0037
0038find_best_control_match_cutoff = .6
0039
0040
0041class MatchError(IndexError):
0042 "A suitable match could not be found"
0043 def __init__(self, items = None, tofind = ''):
0044 "Init the parent with the message"
0045 self.tofind = tofind
0046 self.items = items
0047 if self.items is None:
0048 self.items = []
0049
0050 IndexError.__init__(self,
0051 "Could not find '%s' in '%s'"% (tofind, self.items))
0052
0053
0054_cache = {}
0055
0056
0057
0058
0059def _get_match_ratios(texts, match_against):
0060 "Get the match ratio of how each item in texts compared to match_against"
0061
0062
0063 ratio_calc = difflib.SequenceMatcher()
0064 ratio_calc.set_seq1(match_against)
0065
0066 ratios = {}
0067 best_ratio = 0
0068 best_text = ''
0069
0070 global cache
0071
0072 for text in texts:
0073
0074 if 0:
0075 pass
0076
0077 if (text, match_against) in _cache:
0078 ratios[text] = _cache[(text, match_against)]
0079
0080 elif(match_against, text) in _cache:
0081 ratios[text] = _cache[(match_against, text)]
0082
0083 else:
0084
0085 ratio_calc.set_seq2(text)
0086
0087
0088
0089
0090
0091
0092
0093 ratios[text] = ratio_calc.ratio()
0094
0095 _cache[(match_against, text)] = ratios[text]
0096
0097
0098 if ratios[text] > best_ratio:
0099 best_ratio = ratios[text]
0100 best_text = text
0101
0102 return ratios, best_ratio, best_text
0103
0104
0105
0106
0107
0108def find_best_match(search_text, item_texts, items, limit_ratio = .5):
0109 """Return the item that best matches the search_text
0110
0111 * **search_text** The text to search for
0112 * **item_texts** The list of texts to search through
0113 * **items** The list of items corresponding (1 to 1)
0114 to the list of texts to search through.
0115 * **limit_ratio** How well the text has to match the best match.
0116 If the best match matches lower then this then it is not
0117 considered a match and a MatchError is raised, (default = .5)
0118 """
0119 search_text = _cut_at_tab(search_text)
0120
0121
0122 text_item_map = UniqueDict()
0123
0124
0125 for text, item in zip(item_texts, items):
0126 text_item_map[_cut_at_tab(text)] = item
0127
0128 ratios, best_ratio, best_text = _get_match_ratios(text_item_map.keys(), search_text)
0130
0131 if best_ratio < limit_ratio:
0132 raise MatchError(items = text_item_map.keys(), tofind = search_text)
0133
0134 return text_item_map[best_text]
0135
0136
0137
0138
0139
0140
0141_after_tab = re.compile(ur"\t.*", re.UNICODE)
0142_non_word_chars = re.compile(ur"\W", re.UNICODE)
0143
0144def _cut_at_tab(text):
0145 "Clean out non characters from the string and return it"
0146
0147
0148 return _after_tab.sub("", text)
0149
0150def _clean_non_chars(text):
0151 "Remove non word characters"
0152
0153
0154
0155 return _non_word_chars.sub("", text)
0156
0157
0158def IsAboveOrToLeft(ref_control, other_ctrl):
0159 "Return true if the other_ctrl is above or to the left of ref_control"
0160 text_r = other_ctrl.Rectangle()
0161 ctrl_r = ref_control.Rectangle()
0162
0163
0164 if text_r.left >= ctrl_r.right:
0165 return False
0166
0167
0168 if text_r.top >= ctrl_r.bottom:
0169 return False
0170
0171 return True
0172
0173
0174
0175distance_cuttoff = 999
0176def GetNonTextControlName(ctrl, controls):
0177 """return the name for this control by finding the closest
0178 text control above and to its left"""
0179
0180
0181 names = []
0182
0183 ctrl_index = controls.index(ctrl)
0184
0185 if ctrl_index != 0:
0186 prev_ctrl = controls[ctrl_index-1]
0187
0188 if prev_ctrl.FriendlyClassName() == "Static" and prev_ctrl.IsVisible() and prev_ctrl.WindowText() and IsAboveOrToLeft(ctrl, prev_ctrl):
0191
0192 names.append(
0193 prev_ctrl.WindowText() +
0194 ctrl.FriendlyClassName())
0195
0196
0197
0198
0199 text_ctrls = [ctrl_ for ctrl_ in controls
0200 if ctrl_.IsVisible() and ctrl_.WindowText()]
0201
0202
0203 best_name = ''
0204 closest = distance_cuttoff
0205
0206 for text_ctrl in text_ctrls:
0207
0208
0209 text_r = text_ctrl.Rectangle()
0210 ctrl_r = ctrl.Rectangle()
0211
0212
0213 if text_r.left >= ctrl_r.right:
0214 continue
0215
0216
0217 if text_r.top >= ctrl_r.bottom:
0218 continue
0219
0220
0221
0222
0223
0224
0225
0226
0227
0228
0229
0230
0231
0232
0233
0234
0235
0236
0237
0238
0239
0240
0241
0242
0243
0244
0245
0246
0247
0248 distance = abs(text_r.left - ctrl_r.left) + abs(text_r.bottom - ctrl_r.top)
0249 distance2 = abs(text_r.right - ctrl_r.left) + abs(text_r.top - ctrl_r.top)
0250
0251 distance = min(distance, distance2)
0252
0253
0254 if distance < closest:
0255 closest = distance
0256 best_name = text_ctrl.WindowText() + ctrl.FriendlyClassName()
0257
0258 names.append(best_name)
0259
0260 return names
0261
0262
0263
0264def get_control_names(control, allcontrols):
0265 "Returns a list of names for this control"
0266 names = []
0267
0268
0269
0270
0271
0272
0273 names.append(control.FriendlyClassName())
0274
0275
0276
0277 cleaned = control.WindowText()
0278 if cleaned:
0279 names.append(cleaned)
0280 names.append(cleaned + control.FriendlyClassName())
0281
0282
0283 else:
0284
0285 non_text_names = GetNonTextControlName(control, allcontrols)
0286
0287 if non_text_names:
0288 names.extend(non_text_names)
0289
0290
0291 return sets.Set(names)
0292
0293
0294
0295
0296
0297
0298class UniqueDict(dict):
0299 "A dictionary subclass that handles making it's keys unique"
0300 def __setitem__(self, text, item):
0301 "Set an item of the dictionary"
0302
0303
0304
0305 if text in self:
0306
0307 unique_text = text
0308 counter = 2
0309 while unique_text in self:
0310 unique_text = text + str(counter)
0311 counter += 1
0312
0313
0314
0315 if text + '0' not in self:
0316 dict.__setitem__(self, text+'0', self[text])
0317 dict.__setitem__(self, text+'1', self[text])
0318
0319
0320
0321 text = unique_text
0322
0323
0324 dict.__setitem__(self, text, item)
0325
0326
0327 def FindBestMatches(
0328 self,
0329 search_text,
0330 clean = False,
0331 ignore_case = False):
0332
0333 """Return the best matches for search_text in the items
0334
0335 * **search_text** the text to look for
0336 * **clean** whether to clean non text characters out of the strings
0337 * **ignore_case** compare strings case insensitively
0338 """
0339
0340
0341 ratio_calc = difflib.SequenceMatcher()
0342
0343 if ignore_case:
0344 search_text = search_text.lower()
0345
0346 ratio_calc.set_seq1(search_text)
0347
0348 ratios = {}
0349 best_ratio = 0
0350 best_texts = []
0351
0352 ratio_offset = 1
0353 if clean:
0354 ratio_offset *= .9
0355
0356 if ignore_case:
0357 ratio_offset *= .9
0358
0359 for text_ in self:
0360
0361
0362 text = text_
0363
0364 if clean:
0365 text = _clean_non_chars(text)
0366
0367 if ignore_case:
0368 text = text.lower()
0369
0370
0371 if (text, search_text) in _cache:
0372 ratios[text_] = _cache[(text, search_text)]
0373
0374 elif(search_text, text) in _cache:
0375 ratios[text_] = _cache[(search_text, text)]
0376
0377
0378 else:
0379
0380 ratio_calc.set_seq2(text)
0381
0382
0383
0384 ratio = ratio_calc.real_quick_ratio() * ratio_offset
0385
0386 if ratio >= find_best_control_match_cutoff:
0387 ratio = ratio_calc.quick_ratio() * ratio_offset
0388
0389 if ratio >= find_best_control_match_cutoff:
0390 ratio = ratio_calc.ratio() * ratio_offset
0391
0392
0393 ratios[text_] = ratio
0394 _cache[(text, search_text)] = ratio
0395
0396
0397
0398
0399
0400
0401
0402
0403 if ratios[text_] > best_ratio and ratios[text_] >= find_best_control_match_cutoff:
0405
0406 best_ratio = ratios[text_]
0407 best_texts = [text_]
0408
0409 elif ratios[text_] == best_ratio:
0410 best_texts.append(text_)
0411
0412
0413
0414 return best_ratio, best_texts
0415
0416
0417
0418def build_unique_dict(controls):
0419 """Build the disambiguated list of controls
0420
0421 Separated out to a different function so that we can get
0422 the control identifiers for printing.
0423 """
0424 name_control_map = UniqueDict()
0425
0426
0427
0428
0429 for ctrl in controls:
0430 ctrl_names = get_control_names(ctrl, controls)
0431
0432
0433 for name in ctrl_names:
0434 name_control_map[name] = ctrl
0435 return name_control_map
0436
0437
0438
0439def find_best_control_matches(search_text, controls):
0440 """Returns the control that is the the best match to search_text
0441
0442 This is slightly differnt from find_best_match in that it builds
0443 up the list of text items to search through using information
0444 from each control. So for example for there is an OK, Button
0445 then the following are all added to the search list:
0446 "OK", "Button", "OKButton"
0447
0448 But if there is a ListView (which do not have visible 'text')
0449 then it will just add "ListView".
0450 """
0451
0452 name_control_map = build_unique_dict(controls)
0453
0454
0455
0456
0457
0458
0459
0460
0461
0462
0463
0464 search_text = unicode(search_text)
0465
0466 best_ratio, best_texts = name_control_map.FindBestMatches(search_text)
0467
0468 best_ratio_ci, best_texts_ci = name_control_map.FindBestMatches(search_text, ignore_case = True)
0470
0471 best_ratio_clean, best_texts_clean = name_control_map.FindBestMatches(search_text, clean = True)
0473
0474 best_ratio_clean_ci, best_texts_clean_ci = name_control_map.FindBestMatches(
0476 search_text, clean = True, ignore_case = True)
0477
0478
0479 if best_ratio_ci > best_ratio:
0480 best_ratio = best_ratio_ci
0481 best_texts = best_texts_ci
0482
0483 if best_ratio_clean > best_ratio:
0484 best_ratio = best_ratio_clean
0485 best_texts = best_texts_clean
0486
0487 if best_ratio_clean_ci > best_ratio:
0488 best_ratio = best_ratio_clean_ci
0489 best_texts = best_texts_clean_ci
0490
0491 if best_ratio < find_best_control_match_cutoff:
0492 raise MatchError(items = name_control_map.keys(), tofind = search_text)
0493
0494 return [name_control_map[best_text] for best_text in best_texts]
0495
0496
0497
0498
0499
0500
0501
0502
0503
0504
0505
0506
0507
0508
0509
0510
0511
0512
0513
0514
0515
0516
0517
0518
0519
0520
0521
0522
0523
0524
0525
0526
0527
0528
0529
0530
0531
0532