135: def convert_misc_characters
136: dummy = dup.gsub(/\.{3,}/, " dot dot dot ")
137:
138: {
139: /(\s|^)\$(\d+)\.(\d+)(\s|$)/ => '\2 dollars \3 cents',
140: /(\s|^)£(\d+)\.(\d+)(\s|$)/u => '\2 pounds \3 pence',
141: }.each do |found, replaced|
142: replaced = " #{replaced} " unless replaced =~ /\\1/
143: dummy.gsub!(found, replaced)
144: end
145:
146: {
147: /\s*&\s*/ => "and",
148: /\s*#/ => "number",
149: /\s*@\s*/ => "at",
150: /(\S|^)\.(\S)/ => '\1 dot \2',
151: /(\s|^)\$(\d*)(\s|$)/ => '\2 dollars',
152: /(\s|^)£(\d*)(\s|$)/u => '\2 pounds',
153: /(\s|^)¥(\d*)(\s|$)/u => '\2 yen',
154: /\s*\*\s*/ => "star",
155: /\s*%\s*/ => "percent",
156: /\s*(\\|\/)\s*/ => "slash",
157: /(\s*=\s*)/ => " equals ",
158: /\s*\+\s*/ => "plus"
159: }.each do |found, replaced|
160: replaced = " #{replaced} " unless replaced =~ /\\1/
161: dummy.gsub!(found, replaced)
162: end
163: dummy = dummy.gsub(/(^|\w)'(\w|$)/, '\1\2').gsub(/[\.,:;()\[\]\/\?!\^'"_]/, " ")
164: end