-tinytm_define_context(id, next, previous)
adds context to segment
-tinytm_generate_hash(id) -- problems with int overflow - not overflowing as in Java.. Use importer function to generate hash
generates hash code for segment with id
-tinytm_generate_hash_all()
generates hash code for all segments in tiny_tm -- uses tinytm_generate_hash()
-tinytm_hash_search(int hash) -- ??private??
searches 100% matches based on hash code
-tinytm_industry_search(varchar industry_id) -- ??private??
narrows searchs by filtering the segments by industry
-tinytm_context_search(int central_hash, int next_context_hash, int previous_context_hash) -- private
search 100% matches by hash and by 100% match of neighboring segments
-tinytm_get_fuzzy_matches_by(source_lang, target_lang, source_text, method) -- public interface to all 3 search methods
fuzzy search using all added methods
*method one of "hash", "combined", "contextfree",
combined = narrow by industry find hash and check context, if no results get fuzzy narrowed by industry
contextfree = exact matches without context search narrowed by industry
hash = exact across all industries without context search
helper functions:
-tinytm_define_context(int segment_id, varchar next_segment_key, varchar previous_segment_key)
the segments must have the same context_group_id to create the context relation between them
if segment doesn't have left or right context (is at the end of paragraph) it has next or prev id = -1,
segment -1 is added to the segments table to not invalidate the fk_constraint, this is not same as id being NULL
NULL means no context defined, -1 context defined as not existent.
-tinytm_get_industry_id(varchar industry name, bool createNew)
searches for industry id, or creates new industry if not found and second parameter is true
-tinytm_create_industry(varchar name, varchar comment)
creates industry
new tables - tinytm_industry, tinytm_placeholder_strategies
-- Table: tinytm_industries
-- DROP TABLE tinytm_industries;
CREATE TABLE tinytm_industries
(
name character varying(80),
"comment" text,
id smallint NOT NULL,
CONSTRAINT id PRIMARY KEY (id)
)
WITHOUT OIDS;
ALTER TABLE tinytm_industries OWNER TO tinytm;
CREATE TABLE tinytm_placeholder_strategies
(
id smallint NOT NULL,
name character varying(100) NOT NULL,
definition text,
CONSTRAINT placeholder_strat_id PRIMARY KEY (id)
)
WITHOUT OIDS;
ALTER TABLE tinytm_placeholder_strategies OWNER TO postgres;
COMMENT ON TABLE tinytm_placeholder_strategies IS 'definitions of placeholder strategy used on current segment';
chagned tables - tinytm_segments
CREATE TABLE tinytm_segments
(
segment_id integer NOT NULL,
segment_key character varying(100),
parent_id integer,
owner_id integer NOT NULL,
creation_date timestamp with time zone NOT NULL,
creation_ip character varying(50) NOT NULL,
customer_id integer,
segment_type_id integer NOT NULL,
text_type character varying(50),
document_key character varying(1000),
subject_area_id integer,
source_lang_id integer NOT NULL,
target_lang_id integer NOT NULL,
tags text,
source_text text NOT NULL,
target_text text NOT NULL,
tagged_source_text text,
tagged_target_text text,
source_tmx_tags text,
target_tmx_tags text,
source_tag_ids character varying(255)[],
target_tag_ids character varying(255)[],
industry_id smallint,
segment_hash bigint,
previous integer, -- if the id is the same as current segment it has no previous segment
"next" integer, -- if it is the same id as current segment it has no next segment
placeholder_strategy_id smallint,
context_group_id character varying(64) DEFAULT '_NOGROUP_'::character varying,
CONSTRAINT tinytm_segment_pk PRIMARY KEY (segment_id),
CONSTRAINT fki_strategy FOREIGN KEY (placeholder_strategy_id)
REFERENCES tinytm_placeholder_strategies (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT industry_id FOREIGN KEY (industry_id)
REFERENCES tinytm_industries (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT "next" FOREIGN KEY ("next")
REFERENCES tinytm_segments (segment_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT previous FOREIGN KEY (previous)
REFERENCES tinytm_segments (segment_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT tinytm_segment_parent_fk FOREIGN KEY (parent_id)
REFERENCES tinytm_segments (segment_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT tinytm_segment_type_fk FOREIGN KEY (segment_type_id)
REFERENCES tinytm_segment_types (segment_type_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT tinytm_segments_creation_user_fk FOREIGN KEY (owner_id)
REFERENCES tinytm_users (user_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT tinytm_source_lang_fk FOREIGN KEY (source_lang_id)
REFERENCES tinytm_languages (language_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT tinytm_subject_area_fk FOREIGN KEY (subject_area_id)
REFERENCES tinytm_subject_areas (subject_area_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT tinytm_target_lang_fk FOREIGN KEY (target_lang_id)
REFERENCES tinytm_languages (language_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION
)
WITHOUT OIDS;
ALTER TABLE tinytm_segments OWNER TO postgres;
COMMENT ON COLUMN tinytm_segments.previous IS 'if the id is the -1 it has no previous segment';
COMMENT ON COLUMN tinytm_segments."next" IS 'if the id is the -1 it has no next segment';
-- Index: fki_industry_id
-- DROP INDEX fki_industry_id;
CREATE INDEX fki_industry_id
ON tinytm_segments
USING btree
(industry_id);
-- Index: fki_next
-- DROP INDEX fki_next;
CREATE INDEX fki_next
ON tinytm_segments
USING btree
("next");
-- Index: fki_previous
-- DROP INDEX fki_previous;
CREATE INDEX fki_previous
ON tinytm_segments
USING btree
(previous);
-- Index: fki_strategy
-- DROP INDEX fki_strategy;
CREATE INDEX fki_strategy
ON tinytm_segments
USING btree
(placeholder_strategy_id);
-- Index: id_idx
-- DROP INDEX id_idx;
CREATE UNIQUE INDEX id_idx
ON tinytm_segments
USING btree
(segment_id);
-- Index: source_hash
-- DROP INDEX source_hash;
CREATE INDEX source_hash
ON tinytm_segments
USING hash
(source_text);
-- Index: source_trgm_idx
-- DROP INDEX source_trgm_idx;
CREATE INDEX source_trgm_idx
ON tinytm_segments
USING gist
(source_text gist_trgm_ops);
If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
In next release cycle:
As for the other functions going for the protocol freeze, I plan to add these:
-tinytm_formating_search()
fuzzy search considering format tags using penalties for unmatched format tags
-tinytm_set_option()
sets option to a value, used for penalties and other options
-tinytm_trigram_search()
search optimised by trigram search and fulltext GIT indexes, final score meassured by levenstein on first X candidates
Also some other functions that would be used for CRUD (add, get_id_from_name, delete, update) on industry, customer, language, placeholder and other tables but these are unnecessary now. And can be implemented later or be manipulated manualy through SQL or some SQL utilities.
Also the instalation .sql's need to be updated.
PLEASE COMMENT ON THESE FUNCTIONS.
If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
A request for comments on my protocol proposal. All these functions are implemented and being tested right now (with lots of errors still).
NEW Functions: currently being implemented
-tinytm_new_segment(original_parameters... , tagged_source_tagged_text, tagged_target_text, source_tmx_text, target_tmx_text, source_tag_ids, target_tag_ids, )
-tinytm_new_segment(original_parameters... , tagged_source_tagged_text, tagged_target_text, source_tmx_text, target_tmx_text, source_tag_ids, target_tag_ids, industry_name, next_id, previous_id, context_group_id,segment_hash, ph_strat_id )
-tinytm_define_context(id, next, previous)
adds context to segment
-tinytm_generate_hash(id) -- problems with int overflow - not overflowing as in Java.. Use importer function to generate hash
generates hash code for segment with id
-tinytm_generate_hash_all()
generates hash code for all segments in tiny_tm -- uses tinytm_generate_hash()
-tinytm_hash_search(int hash) -- ??private??
searches 100% matches based on hash code
-tinytm_industry_search(varchar industry_id) -- ??private??
narrows searchs by filtering the segments by industry
-tinytm_context_search(int central_hash, int next_context_hash, int previous_context_hash) -- private
search 100% matches by hash and by 100% match of neighboring segments
-tinytm_get_fuzzy_matches_by(source_lang, target_lang, source_text, method) -- public interface to all 3 search methods
fuzzy search using all added methods
*method one of "hash", "combined", "contextfree",
combined = narrow by industry find hash and check context, if no results get fuzzy narrowed by industry
contextfree = exact matches without context search narrowed by industry
hash = exact across all industries without context search
helper functions:
-tinytm_define_context(int segment_id, varchar next_segment_key, varchar previous_segment_key)
the segments must have the same context_group_id to create the context relation between them
if segment doesn't have left or right context (is at the end of paragraph) it has next or prev id = -1,
segment -1 is added to the segments table to not invalidate the fk_constraint, this is not same as id being NULL
NULL means no context defined, -1 context defined as not existent.
-tinytm_get_industry_id(varchar industry name, bool createNew)
searches for industry id, or creates new industry if not found and second parameter is true
-tinytm_create_industry(varchar name, varchar comment)
creates industry
new tables - tinytm_industry, tinytm_placeholder_strategies
-- Table: tinytm_industries
-- DROP TABLE tinytm_industries;
CREATE TABLE tinytm_industries
(
name character varying(80),
"comment" text,
id smallint NOT NULL,
CONSTRAINT id PRIMARY KEY (id)
)
WITHOUT OIDS;
ALTER TABLE tinytm_industries OWNER TO tinytm;
CREATE TABLE tinytm_placeholder_strategies
(
id smallint NOT NULL,
name character varying(100) NOT NULL,
definition text,
CONSTRAINT placeholder_strat_id PRIMARY KEY (id)
)
WITHOUT OIDS;
ALTER TABLE tinytm_placeholder_strategies OWNER TO postgres;
COMMENT ON TABLE tinytm_placeholder_strategies IS 'definitions of placeholder strategy used on current segment';
chagned tables - tinytm_segments
CREATE TABLE tinytm_segments
(
segment_id integer NOT NULL,
segment_key character varying(100),
parent_id integer,
owner_id integer NOT NULL,
creation_date timestamp with time zone NOT NULL,
creation_ip character varying(50) NOT NULL,
customer_id integer,
segment_type_id integer NOT NULL,
text_type character varying(50),
document_key character varying(1000),
subject_area_id integer,
source_lang_id integer NOT NULL,
target_lang_id integer NOT NULL,
tags text,
source_text text NOT NULL,
target_text text NOT NULL,
tagged_source_text text,
tagged_target_text text,
source_tmx_tags text,
target_tmx_tags text,
source_tag_ids character varying(255)[],
target_tag_ids character varying(255)[],
industry_id smallint,
segment_hash bigint,
previous integer, -- if the id is the same as current segment it has no previous segment
"next" integer, -- if it is the same id as current segment it has no next segment
placeholder_strategy_id smallint,
context_group_id character varying(64) DEFAULT '_NOGROUP_'::character varying,
CONSTRAINT tinytm_segment_pk PRIMARY KEY (segment_id),
CONSTRAINT fki_strategy FOREIGN KEY (placeholder_strategy_id)
REFERENCES tinytm_placeholder_strategies (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT industry_id FOREIGN KEY (industry_id)
REFERENCES tinytm_industries (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT "next" FOREIGN KEY ("next")
REFERENCES tinytm_segments (segment_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT previous FOREIGN KEY (previous)
REFERENCES tinytm_segments (segment_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT tinytm_segment_parent_fk FOREIGN KEY (parent_id)
REFERENCES tinytm_segments (segment_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT tinytm_segment_type_fk FOREIGN KEY (segment_type_id)
REFERENCES tinytm_segment_types (segment_type_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT tinytm_segments_creation_user_fk FOREIGN KEY (owner_id)
REFERENCES tinytm_users (user_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT tinytm_source_lang_fk FOREIGN KEY (source_lang_id)
REFERENCES tinytm_languages (language_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT tinytm_subject_area_fk FOREIGN KEY (subject_area_id)
REFERENCES tinytm_subject_areas (subject_area_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT tinytm_target_lang_fk FOREIGN KEY (target_lang_id)
REFERENCES tinytm_languages (language_id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION
)
WITHOUT OIDS;
ALTER TABLE tinytm_segments OWNER TO postgres;
COMMENT ON COLUMN tinytm_segments.previous IS 'if the id is the -1 it has no previous segment';
COMMENT ON COLUMN tinytm_segments."next" IS 'if the id is the -1 it has no next segment';
-- Index: fki_industry_id
-- DROP INDEX fki_industry_id;
CREATE INDEX fki_industry_id
ON tinytm_segments
USING btree
(industry_id);
-- Index: fki_next
-- DROP INDEX fki_next;
CREATE INDEX fki_next
ON tinytm_segments
USING btree
("next");
-- Index: fki_previous
-- DROP INDEX fki_previous;
CREATE INDEX fki_previous
ON tinytm_segments
USING btree
(previous);
-- Index: fki_strategy
-- DROP INDEX fki_strategy;
CREATE INDEX fki_strategy
ON tinytm_segments
USING btree
(placeholder_strategy_id);
-- Index: id_idx
-- DROP INDEX id_idx;
CREATE UNIQUE INDEX id_idx
ON tinytm_segments
USING btree
(segment_id);
-- Index: source_hash
-- DROP INDEX source_hash;
CREATE INDEX source_hash
ON tinytm_segments
USING hash
(source_text);
-- Index: source_trgm_idx
-- DROP INDEX source_trgm_idx;
CREATE INDEX source_trgm_idx
ON tinytm_segments
USING gist
(source_text gist_trgm_ops);
In next release cycle:
As for the other functions going for the protocol freeze, I plan to add these:
-tinytm_formating_search()
fuzzy search considering format tags using penalties for unmatched format tags
-tinytm_set_option()
sets option to a value, used for penalties and other options
-tinytm_trigram_search()
search optimised by trigram search and fulltext GIT indexes, final score meassured by levenstein on first X candidates
Also some other functions that would be used for CRUD (add, get_id_from_name, delete, update) on industry, customer, language, placeholder and other tables but these are unnecessary now. And can be implemented later or be manipulated manualy through SQL or some SQL utilities.
Also the instalation .sql's need to be updated.
PLEASE COMMENT ON THESE FUNCTIONS.