|
From: <dwa...@us...> - 2007-05-22 05:34:27
|
Revision: 9154
http://zaf.svn.sourceforge.net/zaf/?rev=9154&view=rev
Author: dwaynebailey
Date: 2007-05-21 22:34:23 -0700 (Mon, 21 May 2007)
Log Message:
-----------
Add ability to create hunspell dictionaries. Mostly copied from MySpell with makealias added to provide compression of the affix rules themselves.
Modified Paths:
--------------
trunk/dict/utils/Makefile.language
Added Paths:
-----------
trunk/dict/utils/hunmakealias
Modified: trunk/dict/utils/Makefile.language
===================================================================
--- trunk/dict/utils/Makefile.language 2007-05-22 01:53:50 UTC (rev 9153)
+++ trunk/dict/utils/Makefile.language 2007-05-22 05:34:23 UTC (rev 9154)
@@ -14,7 +14,7 @@
LANG_FAKE?=$(LANG)
REGION_FAKE?=$(REGION)
-.PHONY: all clean clean-myspell clean-ispell clean-aspell clean-autocorr ispell myspell aspell count check wordlist autocorr
+.PHONY: all clean clean-myspell clean-ispell clean-aspell clean-hunspell clean-autocorr ispell myspell aspell hunspell count check wordlist autocorr
TARGETS_MYSPELL=myspell/wordlist.$(CHARSET) myspell/pack-$(LANG)-$(REGION)-$(VERSION).zip myspell/$(LANG)_$(REGION).zip myspell/$(LANG)_$(REGION).dic myspell/spell.txt myspell/VERSION_$(LANG)_$(REGION).txt myspell/install.js myspell/spell-$(LANG)-$(REGION).xpi myspell/$(LANG)-$(REGION).dic myspell/$(LANG)-$(REGION).aff myspell/README-$(LANG)-$(REGION).txt myspell/xpi2
@@ -23,6 +23,8 @@
TARGET_ASPELL=aspell/wordlist.$(CHARSET) aspell/$(LANG).rws aspell/aspell-$(LANG)-$(VERSION_ASPELL)-$(VERSION).tar.bz2 aspell/proc aspell/configure aspell/$(LANG).cwl aspell/info aspell/Makefile aspell/Makefile.pre aspell/$(LANG).* aspell/*.alias aspell/COPYING aspell/README aspell/aspell-$(LANG)-$(VERSION_ASPELL)-$(VERSION)
+TARGETS_HUNSPELL=hunspell/wordlist.$(CHARSET) hunspell/pack-$(LANG)-$(REGION)-$(VERSION).zip hunspell/$(LANG)_$(REGION).zip hunspell/$(LANG)_$(REGION).dic hunspell/$(LANG)_$(REGION)_unalias.dic hunspell/$(LANG)_$(REGION).aff hunspell/spell.txt hunspell/VERSION_$(LANG)_$(REGION).txt
+
TARGET_AUTOCORR=acor/acor_$(LANG)-$(REGION).dat*
TARGET_WORDLISTS=wordlists/wordlist.$(CHARSET) $(WORDLIST.CHARSET)
@@ -30,10 +32,13 @@
MUNCH = ../utils/munch
MUNCH_DEP = $(MUNCH)
-all: myspell aspell wordlist acor
+HUNMUNCH = ../utils/hunmunch
+HUNMUNCH_DEP = $(HUNMUNCH)
-clean: clean-myspell clean-ispell clean-aspell clean-wordlists clean-acor
+all: myspell aspell hunspell wordlist acor
+clean: clean-myspell clean-ispell clean-aspell clean-hunspell clean-wordlists clean-acor
+
# Statistics
count: wordlists/wordlist.$(CHARSET) $(WORDLIST.IN)
@@ -191,6 +196,46 @@
aspell/Makefile.pre: aspell/proc aspell/info
(cd aspell; export LC_COLLATE=C; ./proc create)
+# HunSpell rules
+
+clean-hunspell:
+ rm -rf $(TARGETS_HUNSPELL)
+
+# make hunspell will make the required zip files
+hunspell: hunspell/hunspell-$(LANG)_$(REGION)-$(VERSION).zip hunspell/hunspell-pack-$(LANG)-$(REGION)-$(VERSION).zip
+
+# this is a package that is usable for offline installation
+hunspell/hunspell-pack-$(LANG)-$(REGION)-$(VERSION).zip: hunspell/hunspell-$(LANG)_$(REGION)-$(VERSION).zip hunspell/spell.txt
+ zip -j $@ $^
+
+# we autogenerate the index file...
+hunspell/spell.txt:
+ echo '$(LANG_FAKE),$(REGION_FAKE),$(LANG_FAKE)_$(REGION_FAKE),$(LANGNAME) ($(REGIONNAME)),hunspell-$(LANG)_$(REGION)-$(VERSION).zip' >$@
+
+# this is the standard hunspell package
+hunspell/hunspell-$(LANG)_$(REGION)-$(VERSION).zip: hunspell/$(LANG)_$(REGION).dic hunspell/$(LANG)_$(REGION).aff hunspell/README_$(LANG)_$(REGION).txt hunspell/VERSION_$(LANG)_$(REGION).txt
+ zip -j $@ $^
+
+# make sure the VERSION file name doesn't conflict with other packages
+hunspell/VERSION_$(LANG)_$(REGION).txt: VERSION
+ cp -p $^ $@
+
+# hunspell/ispell don't like the header, or words with spaces in them
+hunspell/wordlist.$(CHARSET): wordlists/wordlist.$(CHARSET)
+ egrep -v '(^#| )' <$< >$@
+
+# this does the work of converting a wordlist into a hunspell dictionary
+# it can take a while
+hunspell/$(LANG)_$(REGION)_unalias.dic: hunspell/wordlist.$(CHARSET) hunspell/$(LANG)_$(REGION)_unalias.aff $(HUNMUNCH_DEP)
+ $(HUNMUNCH) hunspell/wordlist.$(CHARSET) hunspell/$(LANG)_$(REGION)_unalias.aff >$@
+
+hunspell/$(LANG)_$(REGION).dic: hunspell/$(LANG)_$(REGION).aff
+
+hunspell/$(LANG)_$(REGION).aff: hunspell/$(LANG)_$(REGION)_unalias.aff hunspell/$(LANG)_$(REGION)_unalias.dic
+ ../utils/hunmakealias hunspell/$(LANG)_$(REGION)_unalias.dic hunspell/$(LANG)_$(REGION)_unalias.aff
+ mv $(LANG)_$(REGION)_unalias_alias.dic hunspell/$(LANG)_$(REGION).dic
+ mv $(LANG)_$(REGION)_unalias_alias.aff hunspell/$(LANG)_$(REGION).aff
+
# Autocorrect rules
# Naming format for OpenOffice.org 2
Added: trunk/dict/utils/hunmakealias
===================================================================
--- trunk/dict/utils/hunmakealias (rev 0)
+++ trunk/dict/utils/hunmakealias 2007-05-22 05:34:23 UTC (rev 9154)
@@ -0,0 +1,92 @@
+#!/bin/sh
+# makealias: make alias compressed dic and aff files
+# Usage: alias.sh dic aff (not alias.sh aff dic!)
+
+case $# in
+0|1)
+echo 'makealias: make alias compressed dic and aff files
+Usage: makealias file.dic file.aff (not makefile file.aff file.dic!)' >/dev/stderr
+exit;;
+esac
+
+DIC=`basename $1 .dic`
+AFF=`basename $2 .aff`
+
+# FLAG type definition must be before alias definitions
+grep '^FLAG' $2 >"${AFF}_alias.aff"
+
+awk 'BEGIN{n=1;m=1}
+function cutslash(st) {
+ if (split($1,t,"/") > 1) return t[1]
+ return st
+}
+
+FILENAME ~ /.dic$/ && /^[^ ]*\/[^ ]/ {
+ split($1,t,"/")
+ if(!a[t[2]]){
+ a[t[2]]=n
+ b[n]=t[2]
+ n++
+ }
+ if(($0 ~ /\t.*[^\t ]/) && $2 && !a2[$2]){
+ a2[$2]=m
+ c[m]=$2
+ m++
+ }
+ print t[1]"/"a[t[2]] ($2 ? "\t" a2[$2] : "")
+ next
+}
+FILENAME ~ /.dic$/ && $2 {
+ if(($0 ~ /\t.*[^\t ]/) && $2 && !a2[$2]){
+ a2[$2]=m
+ c[m]=$2
+ m++
+ }
+ print cutslash($1) "\t" a2[$2]
+ next
+}
+FILENAME ~ /.dic$/ { print cutslash($1) }
+FILENAME ~ /.aff$/ && /^[PS]FX/ && ($4 ~ /\/[^ ]/) {
+ split($4,t,"/")
+ if(!a[t[2]]){
+ a[t[2]]=n
+ b[n]=t[2]
+ n++
+ }
+ if($6 && !a2[$6]){
+ a2[$6]=m
+ c[m]=$6
+ m++
+ }
+ print $1,$2,$3,t[1]"/"a[t[2]],$5 ($6 ? " " a2[$6] : "") >>"/dev/stderr"
+ next
+}
+FILENAME ~ /.aff$/ && /^[PS]FX/ && $6 {
+ if($6 && !a2[$6]){
+ a2[$6]=m
+ c[m]=$6
+ m++
+ }
+ # deprecated syntax
+ if($7 && !a[$7]){
+ a[$7]=n
+ b[n]=$7
+ n++
+ }
+ print $1,$2,$3,cutslash($4),$5, ($6 ? " " a2[$6] : "") ($7 ? " " a[$7] : "") >>"/dev/stderr"
+ next
+}
+FILENAME ~ /.aff$/ { print $0 >>"/dev/stderr" }
+END{
+ if (n>1) {
+ print "AF", n-1 >>"'${AFF}_alias.aff'"
+ for(i=1;i<n;i++) print "AF",b[i],"#",i >>"'${AFF}_alias.aff'"
+ }
+ if (m>1) {
+ print "AM", m-1 >>"'${AFF}_alias.aff'"
+ for(i=1;i<m;i++) print "AM",c[i],"#",i >>"'${AFF}_alias.aff'"
+ }
+}' $1 $2 >${DIC}_alias.dic 2>${AFF}_alias.$$
+grep -v '^FLAG' ${AFF}_alias.$$ >>${AFF}_alias.aff
+echo "output: ${DIC}_alias.dic, ${AFF}_alias.aff"
+rm ${AFF}_alias.$$
Property changes on: trunk/dict/utils/hunmakealias
___________________________________________________________________
Name: svn:executable
+ *
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|