1 from utilities import get_classifier
2
3 __docformat__='epytext'
4
7 """
8 Constructor for an empty ruleset.
9 """
10 self.rules = []
11 self.length = 0.0
12 self.error = 0.0
13 self.lenH = 0.0
14 self.coverage = 0.0
15 self.confidence = 0.0
16 self.lenEH = 0.0
17
19 """
20 String representation of the ruleset.
21 """
22 string = "["
23 for rule in self.rules:
24 string = string + str(rule) + " "
25 string += "]"
26 return string
27
29 """
30 Append a rule to the ruleset and modify ruleset's properties accordingly.
31
32 @type rule: L{Rule}
33 @param rule: rule to be added.
34 """
35 self.rules.append(rule)
36 self.length += rule.length + 1
37 self.error += rule.error
38
40 """
41 Print out the rules.
42
43 Can be used with Python 2.6+
44
45 @type ruleset: L{Ruleset}
46 @param ruleset: rules to output.
47 @type depth: integer
48 @param depth: Current depth.
49 @type pos_symbol: string
50 @param pos_symbol: Symbol signifying positive classification.
51 @type neg_symbol: string
52 @param neg_symbol: Symbol signifying negative classification.
53 """
54 counter = 0
55 space = (depth * 3) * " "
56 classifier = get_classifier(depth, pos_symbol, neg_symbol)
57 for rule in ruleset.rules:
58 exceptions = rule.exceptions
59 if depth == 0:
60 pass
61 elif counter == 0:
62 print space + "unless",
63
64 else:
65 print space + "or",
66 print rule.name + " -> "+classifier,
67 if len(exceptions.rules) == 0:
68 print " (Coverage {0:.0f}, error {1:3.1f}%)"\
69 .format(rule.coverage, rule.get_error())
70 else:
71 print " (Coverage {0:.0f}, error {1:3.1f}%, without exceptions {2:3.1f}%)"\
72 .format(rule.coverage, rule.get_error(), rule.get_single_error())
73 Ruleset.print_rule_tree(exceptions, depth+1)
74 counter += 1
75
77 """
78 Print out the rules.
79
80 @type ruleset: L{Ruleset}
81 @param ruleset: rules to output.
82 @type depth: integer
83 @param depth: Current depth.
84 @type pos_symbol: string
85 @param pos_symbol: Symbol signifying positive classification.
86 @type neg_symbol: string
87 @param neg_symbol: Symbol signifying negative classification.
88 """
89 counter = 0
90 space = (depth * 3) * " "
91 classifier = get_classifier(depth, pos_symbol, neg_symbol)
92 for rule in ruleset.rules:
93 exceptions = rule.exceptions
94 if depth == 0:
95 pass
96 elif counter == 0:
97 print space + "unless",
98 else:
99 print space + "or",
100 print rule.name + " -> "+classifier,
101 if len(exceptions.rules) == 0:
102 print " (Coverage %.0f" % (rule.coverage) + \
103 ", error %3.1f" % rule.get_error() +"%)"
104 else:
105 print " (Coverage %.0f" % rule.coverage + \
106 ", error %3.1f" % rule.get_error() +"%" + \
107 ", without exceptions %3.1f" % (rule.get_single_error()) +"%)"
108 Ruleset.print_rule_tree(exceptions, depth+1, pos_symbol, neg_symbol)
109 counter += 1
110
112 """
113 Count the number of rules in a ruleset.
114
115 @type ruleset: L{Ruleset}
116 @param ruleset: ruleset whose rules are to be counted.
117 @rtype: integer
118 @return: number of rules in the ruleset.
119 """
120 counter = 0
121 for rule in ruleset.rules:
122 counter += 1
123 counter += Ruleset.count_rules(rule.exceptions)
124 return counter
125
127 """
128 Check if adding a rule improves the ruleset by shortening its description length.
129
130 @type rule: L{Rule}
131 @param rule: rule to be added.
132 @type datapoint_bits: float
133 @param datapoint_bits: bits needed to encode one data point.
134 @type c: float
135 @param c: Symbol signifying positive classification.
136 @rtype: boolean
137 @return: True if adding the rule to ruleset shortens its description length.
138 """
139 cur_len = self.length * c + self.lenEH
140 new_len = (rule.length + 1 + self.length) * c + rule.lenEH + self.error * datapoint_bits
141 return (new_len < cur_len)
142