$v_exc = %r{^(#{IO.read("vowel_exceptions").split("\n").join("|")})}io
$c_exc = %r{^(#{IO.read("consonant_exceptions").split("\n").join("|")})}io
$def_rule = %r{^(a|e|i|o|u)}
def find_article(word)
if (word =~ $def_rule && !(word =~ $v_exc)) || word =~ $c_exc
"an"
else
"a"
end
end
a = %w{test cow hour union solid aardvark one-sided honourable onion euphoric u s x MIT AAA f i }
a.each do |b|
puts find_article(b) + " " + b
end
$ ruby a_an.rb
a test
a cow
an hour
a union
a solid
an aardvark
a one-sided
a honourable
an onion
a euphoric
a u
an s
an x
an MIT
a AAA
an f
an i
AAA
EUAN
EUBANK
EUCALYPTI
EUCALYPTUS
EUCHARIST
EUCHRE
EUCHRED
EUCLEA
EUCLID
EUDOCIA
EUDORA
EUDOSIA
EUDOXIA
EUDY
EUGEN
EULA
EULICH
EULOG
EUNICE
EUPHEMI
EUPHOR
EUPHRATES
EURASIA
EURECA
EUREKA
EUREST
EURO
EURY
EUSEBIO
EUSTAC
EUSTICE
EUSTIS
EUTHANASIA
EUTHANIZE
EUTSEY
EUTSLER
EWALD
EWALT
EWAN
EWART
EWBAL
EWBANK
EWE
EWELL
EWEN
EWER
EWERT
EWES
EWIG
EWING
EWOLDT
EWTON
EWY
ONCE
ONE
OUI
OUIMET
U$
UARCO
UART
UBER
UBIQUIT
UDALE
UDALL
UDY
UEBERROTH
UEDA
UEKI
UENO
UGANDA
UGOLIN
UGRIC
UKASE
UKRAINE
UKULELE
ULA
ULAM
ULAND
ULI
ULIN
ULITSA
ULLENBERG
ULLYSES
ULUNDI
ULYSSES
UMASS
UMETSU
UNABOM
UNANIM
UNESCO
UNI
UNOCAL
UNOSOM
UNUM
URAL
URAM
URANIUM
URANUS
URE$
UREA
UREMIA
URETH
UREY
URI
URIAN
URIARTE
URIAS
URIBE
URIC
URIE
URIEGAS
URIN
URIOSTE
UROKINASE
UROLOG
URUGUAY
URY
USA
USABLE
USAFE
USAGE
USAID
USAIR
USAMERIBANCS
USBANCORP
USE
USINES
USING
USINOR
USTRUST
USUAL
USURP
USURY
UTAH
UTAMARO
UTECH
UTENSILS
UTERINE
UTERO
UTERUS
UTHER
UTICA
UTILI
UTOPIA
UVA
UWE
UYS
CABOK
F$
FM
H$
HBOX
HCES
HEIR
HENRIQUE
HERB
HFDF
HGH
HIAA
HIPOLITO
HOMAGE
HONEST
HONOR
HOUR
L$
LAPD
LCS
LP
LPN
LS
LSD
M$
MBANK
MCORP
MFUME
MGM
MH
MHM
MIT
MPEG
MPG
MPH
MTEL
N$
NDAU
NG
NGEMA
NGOR
NIC
NJT
NPR
NTH
NVHOME
NVRYAN
R$
RB
RPF
RPM
RRROT
RZASA
RZEPKA
S$
SBF
SDN
SH
STDS
SUU
X$
XAVIERA
XERS
XSCRIBE
XTRA
YBANEZ
YBARBO
YBARRA
YGLESIAS
YNIGUEZ
YPSILANTI
YQUEM
YSLETA
YTTRIUM
YVES
YVETTE
YVONNE
YZAGUIRRE
ABATING AH0 B EY1 T IH0 NG
ABBA AE1 B AH0
ABBADO AH0 B AA1 D OW0
ABBAS AH0 B AA1 S
ABBASI AA0 B AA1 S IY0
ABBATE AA1 B EY0 T
File.open('cmudict.0.6d') { |f|
f.each_line { |l|
# vowel-exceptions
if (l =~ /^(A|E|I|O|U)/) && (l =~ /^([A-Z]+) (?!(AA|AE|AH|AO|AW|AY|EH|ER|EY|IH|IY|OW|OY|UH|UW)).*/)
# consonant-exceptions
# if !(l =~ /^(A|E|I|O|U)/) && (l =~ /^([A-Z]+) (AA|AE|AH|AO|AW|AY|EH|ER|EY|IH|IY|OW|OY|UH|UW).*/)
# uni
# if l =~ /^([A-Z]+)/ && l =~ /^(U.*)\s/
puts $1
end
}
}
Another advantage of baking in the articles that seems to be overlooked by most is that you don't have to set flags for when 'the' should be used, or when an article must be omitted. Like: You drop Belial's military sword. You attack the goblin king.
The stripping of articles under the hood is as minimal as adding articles, while the logic is more straight forward, then again, few people are gifted with the kind of objectivity that comes natural for me.