Module ruleset
[hide private]
[frames] | no frames]

Source Code for Module ruleset

  1  from utilities import get_classifier 
  2   
  3  __docformat__='epytext' 
  4   
5 -class Ruleset:
6 - def __init__(self):
7 """ 8 Constructor for an empty ruleset. 9 """ 10 self.rules = [] 11 self.length = 0.0 12 self.error = 0.0 13 self.lenH = 0.0 14 self.coverage = 0.0 15 self.confidence = 0.0 16 self.lenEH = 0.0
17
18 - def __str__(self):
19 """ 20 String representation of the ruleset. 21 """ 22 string = "[" 23 for rule in self.rules: 24 string = string + str(rule) + " " 25 string += "]" 26 return string
27
28 - def append(self, rule):
29 """ 30 Append a rule to the ruleset and modify ruleset's properties accordingly. 31 32 @type rule: L{Rule} 33 @param rule: rule to be added. 34 """ 35 self.rules.append(rule) 36 self.length += rule.length + 1 37 self.error += rule.error
38
39 - def print_rule_tree_2_6(ruleset, depth, pos_symbol, neg_symbol):
40 """ 41 Print out the rules. 42 43 Can be used with Python 2.6+ 44 45 @type ruleset: L{Ruleset} 46 @param ruleset: rules to output. 47 @type depth: integer 48 @param depth: Current depth. 49 @type pos_symbol: string 50 @param pos_symbol: Symbol signifying positive classification. 51 @type neg_symbol: string 52 @param neg_symbol: Symbol signifying negative classification. 53 """ 54 counter = 0 55 space = (depth * 3) * " " 56 classifier = get_classifier(depth, pos_symbol, neg_symbol) 57 for rule in ruleset.rules: 58 exceptions = rule.exceptions 59 if depth == 0: 60 pass 61 elif counter == 0: 62 print space + "unless", 63 #if len(sub_rules.rules) > 0: print "" 64 else: 65 print space + "or", 66 print rule.name + " -> "+classifier, 67 if len(exceptions.rules) == 0: 68 print " (Coverage {0:.0f}, error {1:3.1f}%)"\ 69 .format(rule.coverage, rule.get_error()) 70 else: 71 print " (Coverage {0:.0f}, error {1:3.1f}%, without exceptions {2:3.1f}%)"\ 72 .format(rule.coverage, rule.get_error(), rule.get_single_error()) 73 Ruleset.print_rule_tree(exceptions, depth+1) 74 counter += 1
75
76 - def print_rule_tree(ruleset, depth, pos_symbol, neg_symbol):
77 """ 78 Print out the rules. 79 80 @type ruleset: L{Ruleset} 81 @param ruleset: rules to output. 82 @type depth: integer 83 @param depth: Current depth. 84 @type pos_symbol: string 85 @param pos_symbol: Symbol signifying positive classification. 86 @type neg_symbol: string 87 @param neg_symbol: Symbol signifying negative classification. 88 """ 89 counter = 0 90 space = (depth * 3) * " " 91 classifier = get_classifier(depth, pos_symbol, neg_symbol) 92 for rule in ruleset.rules: 93 exceptions = rule.exceptions 94 if depth == 0: 95 pass 96 elif counter == 0: 97 print space + "unless", 98 else: 99 print space + "or", 100 print rule.name + " -> "+classifier, 101 if len(exceptions.rules) == 0: 102 print " (Coverage %.0f" % (rule.coverage) + \ 103 ", error %3.1f" % rule.get_error() +"%)" 104 else: 105 print " (Coverage %.0f" % rule.coverage + \ 106 ", error %3.1f" % rule.get_error() +"%" + \ 107 ", without exceptions %3.1f" % (rule.get_single_error()) +"%)" 108 Ruleset.print_rule_tree(exceptions, depth+1, pos_symbol, neg_symbol) 109 counter += 1
110
111 - def count_rules(ruleset):
112 """ 113 Count the number of rules in a ruleset. 114 115 @type ruleset: L{Ruleset} 116 @param ruleset: ruleset whose rules are to be counted. 117 @rtype: integer 118 @return: number of rules in the ruleset. 119 """ 120 counter = 0 121 for rule in ruleset.rules: 122 counter += 1 123 counter += Ruleset.count_rules(rule.exceptions) 124 return counter
125
126 - def improved_by(self, rule, datapoint_bits, c):
127 """ 128 Check if adding a rule improves the ruleset by shortening its description length. 129 130 @type rule: L{Rule} 131 @param rule: rule to be added. 132 @type datapoint_bits: float 133 @param datapoint_bits: bits needed to encode one data point. 134 @type c: float 135 @param c: Symbol signifying positive classification. 136 @rtype: boolean 137 @return: True if adding the rule to ruleset shortens its description length. 138 """ 139 cur_len = self.length * c + self.lenEH 140 new_len = (rule.length + 1 + self.length) * c + rule.lenEH + self.error * datapoint_bits 141 return (new_len < cur_len)
142