Menu

Sorry for silence... still here...

Soren Bro
2019-03-25
2019-03-25
  • Soren Bro

    Soren Bro - 2019-03-25

    I'm trying to write what may be the ugliest REGEX known to man in order to
    harvest data from a wiki page. I refuse to do it by manually...

    [CODE]
    if( $_=~ /<img alt=\"(.?)\" src=\"(.?)\" decoding=\".?\" width=\".?\"
    height=\".?\" class=\".?\" srcset=\"(.?)\" data-file-width=\".?\"
    data-file-height=\".?\"\s /) {
    print "\tLINE: [$1], [$2], [$3], \n";
    [/CODE]

    and I'm not nearly done. Please don't ask. await the result....

     
    • Soren Bro

      Soren Bro - 2019-03-25

      I know gow much you love this DB stuff....:

      [CODE]
      -- Create HERMES DATABASE
      DROP DATABASE HERMES;
      CREATE DATABASE HERMES;
      USE HERMES;

      --

      -- Create normalization tables.

      -- Normalization suppport for names.
      DROP TABLE _NAMES;
      CREATE TABLE _NAMES (ID INT NOT NULL AUTO_INCREMENT PRIMARY KEY, NAME
      VARCHAR(50));

      -- Normalization suppport for email nicks.
      DROP TABLE _NICKS;
      CREATE TABLE _NICKS (ID INT NOT NULL AUTO_INCREMENT PRIMARY KEY, NICK
      VARCHAR(50));

      -- Normalization suppport for email servers.
      DROP TABLE _SERVERS;
      CREATE TABLE _SERVERS (ID INT NOT NULL AUTO_INCREMENT PRIMARY KEY, SERVER
      VARCHAR(50));

      -- Normalization suppport for email address extensions.
      -- This is gonna go
      DROP TABLE _EXT;
      CREATE TABLE _EXTS (ID INT NOT NULL AUTO_INCREMENT PRIMARY KEY, EXT
      VARCHAR(50));

      DROP TABLE _SOVEREIGNTIES;
      CREATE TABLE _SOVEREIGNTIES (ID INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
      TYPE VARCHAR(80) UNIQUE);
      INSERT INTO _SOVEREIGNTIES (ID, TYPE) VALUES (NULL, 'UN member state');
      INSERT INTO _SOVEREIGNTIES (ID, TYPE) VALUES (NULL, 'Finland');
      INSERT INTO _SOVEREIGNTIES (ID, TYPE) VALUES (NULL, 'United States');
      INSERT INTO _SOVEREIGNTIES (ID, TYPE) VALUES (NULL, 'United Kingdom');
      INSERT INTO _SOVEREIGNTIES (ID, TYPE) VALUES (NULL, 'Antartic Treaty');
      INSERT INTO _SOVEREIGNTIES (ID, TYPE) VALUES (NULL, 'Netherlands');
      INSERT INTO _SOVEREIGNTIES (ID, TYPE) VALUES (NULL, 'Norway');
      INSERT INTO _SOVEREIGNTIES (ID, TYPE) VALUES (NULL, 'Australia');
      INSERT INTO _SOVEREIGNTIES (ID, TYPE) VALUES (NULL, 'New Zealand');
      INSERT INTO _SOVEREIGNTIES (ID, TYPE) VALUES (NULL, 'Denmark');
      INSERT INTO _SOVEREIGNTIES (ID, TYPE) VALUES (NULL, 'France');
      INSERT INTO _SOVEREIGNTIES (ID, TYPE) VALUES (NULL, 'British Crown');
      INSERT INTO _SOVEREIGNTIES (ID, TYPE) VALUES (NULL, 'UN observer');
      INSERT INTO _SOVEREIGNTIES (ID, TYPE) VALUES (NULL, 'China');
      INSERT INTO _SOVEREIGNTIES (ID, TYPE) VALUES (NULL, 'Disputed');

      -- Normalization suppport for countries.
      -- IDEA: Some kind of banner (Which ofcourse can be turned off and on) fed
      by an RSS with exchange rates and/or stock values.
      -- I'm assuming that the people using this program is interested in making
      money. One way or the other...
      DROP TABLE _COUNTRIES;
      CREATE TABLE _COUNTRIES ( \ ID INT NOT NULL AUTO_INCREMENT PRIMARY KEY, \ FLAG TINYBLOB, \ NAME CHAR(70) UNIQUE NOT NULL, \ OFFICIAL CHAR(100) UNIQUE NOT NULL, \ SOV_ID INT, FOREIGN KEY (SOV_ID) REFERENCES _SOVEREIGNTIES(ID), \ ALPHA2 CHAR(2) UNIQUE NOT NULL, \ ALPHA3 CHAR(3) UNIQUE NOT NULL, \ NUMCODE CHAR(4), \ SDCODELINK CHAR(30), \ TLD CHAR(10) \ );

      -- Proof of concept here... This wil be created autmatically by a PERL
      script. I'm not manually typing every country on the planet in.
      -- TLD will replace the normalization table _EXT. If the address extension
      is not int this column (think: IF EXT NOT IN (SELECT TLD FROM _COUNTRIES)
      RAISE ERROR)

      INSERT INTO _COUNTRIES (ID, FLAG, NAME, OFFICIAL, SOV_ID, ALPHA2, ALPHA3,
      NUMCODE, SDCODELINK, TLD) VALUES \ (NULL, './Flags/35px-Flag_of_Afghanistan.svg.png', 'Afghanistan', 'The
      Islamic Republic of Afghanistan', 1, 'AF', 'AFG', '004', 'ISO 3166-2:AF',
      'af');

      INSERT INTO _COUNTRIES (ID, FLAG, NAME, OFFICIAL, SOV_ID, ALPHA2, ALPHA3,
      NUMCODE, SDCODELINK, TLD) VALUES \ (NULL, './Flags/35px-Flag_of_Åland.svg.png', 'Åland Islands', 'Åland', 2,
      'AX', 'ALA', '248', 'ISO 3166-2:AX', 'ax');

      INSERT INTO _COUNTRIES (ID, FLAG, NAME, OFFICIAL, SOV_ID, ALPHA2, ALPHA3,
      NUMCODE, SDCODELINK, TLD) VALUES \ (NULL, './32px-Flag_of_Albania.svg.png', 'Albania', 'The Repubilc of
      Albania', 1, 'AL', 'ALB', '008', 'ISO 3166-2:AL', 'al');

      INSERT INTO _COUNTRIES (ID, FLAG, NAME, OFFICIAL, SOV_ID, ALPHA2, ALPHA3,
      NUMCODE, SDCODELINK, TLD) VALUES \ (NULL, './35px-Flag_of_Algeria.svg.png', 'Algeria', 'The People\'s
      Democratic Republic of Algeria', 1, 'DZ', 'DZA', '012', 'ISO 3166-2:DZ',
      'dz');

      INSERT INTO _COUNTRIES (ID, FLAG, NAME, OFFICIAL, SOV_ID, ALPHA2, ALPHA3,
      NUMCODE, SDCODELINK, TLD) VALUES \ (NULL, './35px-Flag_of_American_Samoa.svg.png', 'American Samoa', 'The
      Territory of American Samoa', 3, 'AS', 'ASM', '016', 'ISO 3166-2:AS', 'as');

      INSERT INTO _COUNTRIES (ID, FLAG, NAME, OFFICIAL, SOV_ID, ALPHA2, ALPHA3,
      NUMCODE, SDCODELINK, TLD) VALUES \ (NULL, './33px-Flag_of_Andorra.svg.png', 'Andorra', 'The Principality of
      Andorra', 1, 'AD', 'AND', '020', 'ISO 3166-2:AD', 'ad');

      INSERT INTO _COUNTRIES (ID, FLAG, NAME, OFFICIAL, SOV_ID, ALPHA2, ALPHA3,
      NUMCODE, SDCODELINK, TLD) VALUES \ (NULL, './35px-Flag_of_Angola.svg.png', 'Angola', 'The Republic of Angola',
      1, 'AO', 'AGO', '024', 'ISO 3166-2:AO', 'ao');

      INSERT INTO _COUNTRIES (ID, FLAG, NAME, OFFICIAL, SOV_ID, ALPHA2, ALPHA3,
      NUMCODE, SDCODELINK, TLD) VALUES \ (NULL, './35px-Flag_of_Anguilla.svg.png', 'Anguilla', 'Anguilla', 4, 'AI',
      'AIA', '660', 'ISO 3166-2:AI', 'ai');

      INSERT INTO COUNTRIES (ID, FLAG, NAME, OFFICIAL, SOV_ID, ALPHA2, ALPHA3,
      NUMCODE, SDCODELINK, TLD) VALUES \ (NULL, './35px-Proposed_flag_of_Antarctica
      (Graham_Bartram).svg.png',
      'Antarctica', 'All land and ice shelves south of the 60th parallel south',
      5, 'AQ', 'ATA', '010', 'ISO 3166-2:AQ', 'aq');


      -- Background table. We will provide a VIEW of this table instead.
      -- Again EXT will replaced by _COUNTRIES.TLD
      DROP TABLE _CONTACTS;
      CREATE TABLE _CONTACTS ( \ ID INT NOT NULL AUTO_INCREMENT PRIMARY KEY, \ NICK_ID INT, FOREIGN KEY (NICK_ID) REFERENCES _NICKS(ID), \ SERVER_ID INT, FOREIGN KEY (SERVER_ID) REFERENCES _SERVERS(ID), \ EXT_ID INT, FOREIGN KEY (EXT_ID) REFERENCES _EXTS(ID), \ NAME1_ID INT, FOREIGN KEY (NAME1_ID) REFERENCES _NAMES(ID), \ NAME2_ID INT, FOREIGN KEY (NAME3_ID) REFERENCES _NAMES(ID), \ NAME3_ID INT, FOREIGN KEY (NAME4_ID) REFERENCES _NAMES(ID), \ ADDRESS1 VARCHAR(50), \ ADDRESS2 VARCHAR(50), \ DESIGNATION CHAR(10), \ POSTALNO CHAR(20),
      COUNTRY_ID INT FOREIGN KEY (COUNTRY_ID) REFERENCES _COUTRIES(ID), \ PHONE1 CHAR(30), \ PHONE2 CHAR(30) \ );


      -- Create procedure for adding a CONTACT.
      -- As I mentioneed my SQL is somewhat rusty (and probably never superb to
      begin with). If anyone has any ideas or guides as to
      -- how this could be done more correct, compact or robust I'm all ears...
      -- Especially the wordd split I'm not completelt satisfied with, As with
      inserts, I'm sure there's a smarter wat, this just seemed
      -- somewhat robust to begin with.
      -- I changed the work variable called "WORK", as this seemed to be a
      language keyword.
      -- Again EXT will replaced by _COUNTRIES.TLD
      DROP PROCEDURE AddContact;
      DELIMITER |
      CREATE PROCEDURE AddContact(IN EMAIL VARCHAR(50))
      BEGIN
      DECLARE W VARCHAR(50);
      DECLARE WORKNICK VARCHAR(50);
      DECLARE NICK_ID INT;
      DECLARE WORKSERVER VARCHAR(50);
      DECLARE SERVER_ID INT;
      DECLARE WORKEXT VARCHAR(50);
      DECLARE EXT_ID INT;
      SELECT TRIM(EMAIL) INTO W;
      SELECT SUBSTRING_INDEX(W, '@', 1) INTO WORKNICK;
      SELECT SUBSTRING_INDEX(W, '@', -1) INTO WORKSERVER;
      SELECT SUBSTRING_INDEX(W, '.', -1) INTO WORKEXT;

      SELECT ID FROM _NICKS WHERE NICK = WORKNICK INTO NICK_ID;
      IF(NICK_ID IS NULL)
      THEN
      INSERT INTO _NICKS (ID, NICK) VALUES (NULL, WORKNICK);
      SELECT LAST_INSERT_ID() INTO NICK_ID;
      END IF;

      SELECT ID FROM _SERVERS WHERE SERVER = WORKSERVER INTO SERVER_ID;
      IF(SERVER_ID IS NULL)
      THEN
      INSERT INTO _SERVERS (ID, SERVER) VALUES (NULL, WORKSERVER);
      SELECT LAST_INSERT_ID() INTO SERVER_ID;
      END IF;

      SELECT ID FROM _EXTS WHERE EXT = WORKEXT INTO EXT_ID;
      IF(EXT_ID IS NULL)
      THEN
      INSERT INTO _EXTS (ID, EXT) VALUES (NULL, WORKEXT);
      SELECT LAST_INSERT_ID() INTO EXT_ID;
      END IF;

      INSERT INTO _CONTACTS (ID, NICK_ID, SERVER_ID, EXT_ID) VALUES (NICK_ID,
      SERVER_ID, EXT_ID, NULL);

      END |

      DELIMITER ;


      -- Create a contact.
      CALL AddContact(' sbrothy@gmail.com ');


      -- Create VIEW CONTACTS
      DROP VIEW CONTACTS;
      CREATE VIEW CONTACTS AS SELECT (CONCAT(NICK, SERVER, EXT) AS (EMAIL))
      FROM _CONTACTS AS _C
      INNER JOIN _NICKS AS _N ON _C.ID = _N.ID
      INNER JOIN _SERVERS AS _S ON _C.ID = _S.ID
      INNER JOIN _EXTS AS _E ON _C.ID = _E.ID;


      SELECT * FROM CONTACTS;


      --The MESSAGES tables is a lor more complicated. Perhaps with a composite
      PRIMARY KEY. The pattern will be the same though, It may have to referencve
      itself buy I'll make that work!

      [/CODE]

      On Mon, Mar 25, 2019 at 7:19 PM Soren Bro sbrothy@users.sourceforge.net
      wrote:

      I'm trying to write what may be the ugliest REGEX known to man in order to
      harvest data from a wiki page. I refuse to do it by manually...

      [CODE]
      if( $_=~ /<img alt=\"(.?)\" src=\"(.?)\" decoding=\".?\" width=\".?\"
      height=\".?\" class=\".?\" srcset=\"(.?)\" data-file-width=\".?\"
      data-file-height=\".?\"\s /) {
      print "\tLINE: [$1], [$2], [$3], \n";
      [/CODE]

      and I'm not nearly done. Please don't ask. await the result....

      Sorry for silence... still here...
      https://sourceforge.net/p/hermesmail/discussion/general/thread/0491a4468e/?limit=25#e094


      Sent from sourceforge.net because you indicated interest in
      https://sourceforge.net/p/hermesmail/discussion/general/

      To unsubscribe from further messages, please visit
      https://sourceforge.net/auth/subscriptions/

       

Log in to post a comment.

Want the latest updates on software, tech news, and AI?
Get latest updates about software, tech news, and AI from SourceForge directly in your inbox once a month.