# This is a quick implementation of a test to detect the hispanic origin
# of a given last name, as described in Technical Working Paper No. 13 (part
# 7.1.3, Orthographic Structure of Surnames) of the Population Division of the
# U.S. Census Bureau.
#
#
# Copyright (c) 2007 Jordi S. Bunster
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.

class String
  def hispanic?
    # We first have to "normalize" the name, so we make it lowercase and
    # replace characters beyond ASCII with the nearest sound.
    lastname = self.downcase

    accent_replacement_map = {
      ['á','à','â','ä','Ä','Â','À','Á','ã','Ã'] => 'a',
      ['é','è','ê','ë','Ë','Ê','È','É']         => 'e',
      ['í','ì','î','ï','Ï','Î','Ì','Í']         => 'i',
      ['ó','ò','ô','ö','Ö','Ô','Ò','Ó','õ','Õ'] => 'o',
      ['ú','ù','û','ü','Ü','Û','Ù','Ú']         => 'u',
      ['ñ','Ñ']                                 => 'n',
      ['ç','Ç']                                 => 'c',
    }

    accent_replacement_map.each do |acccent_family, replacement|
      acccent_family.each { |accent| lastname.gsub!(accent, replacement) }
    end

    # We start with some common known exceptions that fool the test:
    COMMON_EXCEPTIONS.each do |exception|
      return true if lastname == exception
    end

    # If lastname (sub)contains a single one of these, it is not hispanic.
    INVALID_SUBSTRINGS.each do |sub_string|
      return false if lastname.include? sub_string
    end

    # If the last two letters of lastname are 'pi', the next test is moot.
    unless lastname.slice(-2, 2) == 'pi'
      # The last three letters of lastname *must* be in this array
      return false unless VALID_STARTS.include? lastname.slice(-3, 3)
    end

    # If the last two letters of lastname are 'pi', the next test is moot.
    unless lastname.slice(0, 2) == 'pi'
      # The first three letters of lastname *must* be in this array
      return false unless VALID_ENDS.include? lastname.slice(0, 3)
    end

    true # since we've gotten so far
  end

  ##### BEWARE: Ugly large arrays from now on, and nothing more. :)

  COMMON_EXCEPTIONS = [ 'saavedra', 'cotto', 'jasso', 'delossantos' ]

  INVALID_SUBSTRINGS = [ 'w','k','tt','nn','aa','mm','bb','cc','dd','ee',
  'ff','gg','hh','ii','jj','oo','pp','qq','ss','uu','vv','xx','yy','zz' ]

  VALID_STARTS = [ 'aal','aba','abe','abi','abo','abu','aca','ace','ach',
  'aci','aco','ada','ade','adi','ado','ady','ael','aen','aes','aex','aez',
  'afa','afe','aga','ago','agu','aig','ain','aio','ais','ait','aiz','aja',
  'ajo','ala','ald','ale','ali','all','alo','als','alt','alu','aly','alz',
  'ama','ame','amo','amp','ana','and','ane','ani','ano','ans','ant','any',
  'anz','aos','apa','ape','apo','ara','ard','are','ari','aro','arr','ars',
  'art','ary','arz','asa','ash','asi','aso','asp','ass','asu','ata','ate',
  'ath','ati','ato','ats','att','atu','aua','aud','aue','aul','aun','aur',
  'aus','aut','aux','auz','ava','ave','avo','aya','ayo','aza','azo','azu',
  'bad','bal','ban','bao','bar','bas','bat','bau','bay','baz','bea','bel',
  'ben','beo','ber','bes','bet','beu','bey','bez','bia','bie','bil','bin',
  'bio','bis','biz','bla','blo','boa','bol','bon','boo','bor','bos','bot',
  'boy','boz','bra','bre','bro','bua','buz','cal','can','car','cas','cay',
  'caz','cca','cea','ced','cel','cen','ceo','cer','ces','cez','cha','che',
  'cho','chy','chz','cia','cid','cil','cin','cio','cis','ciz','coa','com',
  'con','cor','cos','cot','coy','coz','cre','cri','cto','cua','cud','cue',
  'cui','cun','cur','cut','cuy','cuz','dad','dal','dan','dar','das','day',
  'daz','dea','dei','del','deo','der','des','det','deu','dey','dez','dia',
  'die','dil','din','dio','dip','dis','dit','diz','dlo','dma','doa','doc',
  'dol','don','dor','dos','dot','doy','doz','dra','dre','dro','dua','due',
  'dui','duy','duz','eal','ean','ear','eas','eaz','eba','ebo','eca','ech',
  'eco','eda','ede','edo','ega','ego','egu','eia','ein','eis','eja','ejo',
  'ela','eli','ell','elo','ely','ema','eme','emo','emy','ena','ene','eng',
  'eni','eno','ens','ent','eny','enz','eon','eos','eoz','epa','epe','epo',
  'era','ern','ero','err','ers','ert','eru','erz','esa','ese','esi','eso',
  'ess','esu','eta','ete','eto','ets','ety','eus','eut','euz','eva','eve',
  'evo','eya','eyo','eza','ezo','ezu','fan','far','fas','fat','fau','fay',
  'faz','feo','fex','fez','ffa','ffi','ffo','fil','fin','fio','fiz','foa',
  'fos','fox','foz','fra','fre','fuz','gal','gan','gar','gas','gat','gau',
  'gay','gaz','gdo','gea','gel','gen','ger','ges','get','gez','gia','gil',
  'gin','gio','giz','gle','goa','gol','gon','gor','gos','got','goy','goz',
  'gra','gre','gro','gua','gue','gui','gul','gun','guo','gur','gus','guy',
  'guz','han','har','has','haz','hea','hel','heo','her','hes','het','heu',
  'hez','hia','hil','hin','hio','his','hiz','hoa','hon','hor','hos','hot',
  'hoz','hua','hue','hui','huz','ial','ian','iar','ias','iay','iaz','iba',
  'ibe','ibu','ica','ice','ico','ida','ide','idi','ido','iel','ien','ier',
  'ies','iez','ifa','ife','iga','igo','igu','ija','ijo','ila','ile','ill',
  'ilo','ils','ilu','ily','ima','ime','imo','ina','ine','ini','ino','ins',
  'inz','iol','ion','ios','ioz','ipa','ipe','ipi','ipo','ira','ire','iri',
  'iro','iru','isa','iso','isu','ita','ite','iti','ito','its','itu','ity',
  'itz','ius','iuz','iva','ive','ivo','ixa','iya','iye','iza','izo','izu',
  'jal','jam','jan','jar','jas','jaz','jea','jel','jen','jer','jes','jez',
  'jia','jil','jio','jiz','joa','jol','jon','joo','jos','joz','jua','jul',
  'lan','lao','lar','las','lat','lau','lay','laz','lba','lbe','lbi','lbo',
  'lca','lce','lco','lda','lde','ldi','ldo','ldu','lea','led','leg','lem',
  'len','leo','ler','les','let','lex','lez','lfo','lga','lgo','lia','lid',
  'lil','lin','lio','lis','liu','lix','liz','lja','ljo','lla','lle','lli',
  'llo','lls','lma','lme','lmo','lna','loa','lom','lon','lor','los','lot',
  'lou','loy','loz','lpa','lpi','lsa','lso','lta','lto','lua','lum','luo',
  'luz','lva','lve','lvi','lvo','lza','lzo','lzu','mal','man','mao','mar',
  'mas','mat','mau','may','maz','mba','mbo','mea','mei','mel','men','meo',
  'mer','mes','meu','mey','mez','mia','mil','min','mio','mir','mis','miz',
  'mlo','moa','mol','mon','mor','mos','mot','moz','mpa','mpo','mua','mus',
  'muy','muz','nal','nan','nao','nar','nar','nas','nat','nau','naz','nca',
  'nce','nco','nda','nde','ndi','ndo','ndz','nea','nel','neo','ner','nes',
  'net','nex','ney','nez','nfo','nga','nge','ngo','nia','nil','nin','nio',
  'nis','niz','nja','nje','njo','nna','nne','noa','nod','nol','non','nor',
  'nos','noz','npa','nsa','nso','nta','nte','nto','ntu','nty','nua','nue',
  'nur','nus','nuz','nva','nza','nzo','nzu','oal','oas','oaz','oba','obe',
  'obo','oca','oce','och','oco','oda','odo','odz','oea','oel','oen','oes',
  'oey','oez','oga','ogo','oig','oin','ois','oix','oiz','oja','ojo','ola',
  'oll','olo','ols','oma','ome','omo','oms','ona','one','ong','oni','ono',
  'ons','ont','opa','ope','opo','ora','ori','oro','ors','ort','ory','osa',
  'ose','oso','ota','ote','oto','oud','oul','ova','ove','ovi','ovo','oya',
  'oyo','oza','ozo','ozu','pal','pan','par','pas','paz','per','pes','pez',
  'pia','pin','pio','pis','piz','pla','poa','pol','pon','pos','pou','poy',
  'poz','ppa','pro','pua','pus','puz','qua','que','qui','rad','rai','ral',
  'ran','rao','rar','ras','rat','rau','ray','raz','rba','rbe','rbo','rca',
  'rce','rch','rco','rda','rde','rdi','rdo','rdu','rea','red','rel','ren',
  'reo','rer','res','ret','reu','rev','rew','rex','rey','rez','rfa','rfi',
  'rga','rge','rgi','rgo','ria','rib','rid','rie','rig','ril','rim','rin',
  'rio','ris','rit','riu','riz','rja','rjo','rla','rlo','rma','rme','rmo',
  'rna','rne','rni','rno','rns','rnu','roa','rol','ron','roo','ros','roy',
  'roz','rpa','rpi','rra','rre','rri','rro','rru','rry','rrz','rsa','rse',
  'rso','rta','rte','rth','rti','rto','rtu','rty','rua','rue','rui','rur',
  'rus','ruz','rva','rvo','rza','rze','rzo','rzu','saa','sad','sal','sar',
  'sas','saz','sca','sch','sco','sea','sel','sen','seo','ser','ses','set',
  'sez','sga','sgo','sia','sil','sin','sio','sis','siz','sla','sle','sma',
  'sme','sno','soa','sol','son','sor','sos','soz','spe','spi','spo','ssa',
  'ssi','sso','ssy','sta','ste','sti','sto','stu','sty','sua','sud','sul',
  'sun','sus','suz','sva','tad','tal','tan','tao','tar','tas','tau','tay',
  'taz','tea','tel','ten','teo','ter','tes','teu','tey','tez','tga','tia',
  'tie','til','tin','tio','tir','tis','tiz','toa','tol','ton','tor','tos',
  'tot','tou','toy','toz','tra','tre','tro','try','tta','tte','tto','tts',
  'tty','tua','tud','tun','tur','tus','tuz','ual','uan','uar','uas','uay',
  'uba','ube','ubi','uca','uce','uch','uco','uda','udo','uea','ued','uel',
  'uen','ueo','uer','ues','uet','uez','ufe','ufo','uga','ugo','uia','uig',
  'uil','uin','uio','uis','uit','uiu','uiz','uja','ujo','ula','ule','uli',
  'ulo','uls','uma','ume','una','uno','unz','uon','uoz','upe','upo','ura',
  'ure','uri','uro','urt','uru','ury','urz','usa','uso','uta','ute','uto',
  'uva','uxo','uya','uyo','uza','uze','uzo','uzu','val','van','var','vas',
  'vat','vay','vaz','vdo','vea','vel','ven','veo','ver','ves','vet','vez',
  'via','vid','vie','vil','vin','vio','vis','viz','vjo','vle','voa','vol',
  'von','vor','vos','voz','vua','vuz','xar','xas','xia','xta','xto','yan',
  'yar','yas','yaz','yba','yco','yda','yde','yea','yen','yer','yes','yet',
  'yez','yja','ymi','yna','yni','yno','yoa','yol','yon','yor','yos','you',
  'yra','yre','yro','yta'