Notepad++ Python Script / Discussion / Script Showcase: Convert Urls to Hyperlinks

Convert Urls to Hyperlinks - by selection or whole document

Created: 2014-10-14

Updated: 2014-10-15

# Script name: http_urls_to_hyperlinks.py  By: Stan S. Created: 10-14-2014 with v.2.7.8 (60lines)
#
# Purpose: To convert urls to hyperlinks.
#          Urls in quotations such as urls already in hyerlinks are skipped.
#
# How to:  It works either by coverting urls that are highlighted, selected,
#          or if there is no selection, 
#          it will convert urls in the whole document.
#          It will also convert consecutive urls that are not separated by whitespace.
#
# Example:
#
# http://example.com
#
# to
#
# <a href="http://example.com">http://example.com</a>
#

from Npp import *
import os

def tolink(s):
    return '<a href="'+s+'">'+s+'</a>'

def tospace(s, sub, pos=0): #just to add a space to consecutive urls without whitespace
    str = ''
    pos = s.find(sub, pos)
    start = pos
    while pos != -1:
        pos = s.find(sub, pos + 1)

        if pos != -1:
            end = pos
        else:
            end = len(s)

        str = str + s[start:end] + ' '
        start = end
    return str

def convert_urls_to_hyperlinks(): #selection or entire document
    selection = False
    content = editor.getSelText()
    if content != '':
        content = content + ' '      #for last line potential url(s) without a newline
        selection = True
    else:
        content = editor.getText()
        content = content + ' '      #for last line potential url(s) without a newline

    list = content.splitlines()
    for line in list:
        match = [ s for s in line.split() if "http" == s[:4].lower() ]
        for item in match:
            if item.count('http') > 1:
                urls = tospace(item,'http').rstrip(' ')
                content = content.replace(item,urls)

    content = content + ' '
    list = content.splitlines()

    for line in list:      #convert urls to hyperlinks
        match = [ s for s in line.split() if "http" == s[:4].lower() ]
        for item in match:
            content = content.replace(item + os.linesep, tolink(item) + os.linesep ) # must be os.linesep, not just '\n', the interface between Python Script and Notepad++ document buffer being treated as though reading from a file with b option enabled, in binary mode (not text mode)
            content = content.replace(item + ' '       , tolink(item) + ' '  )

    if(selection):
        editor.replaceSel(content.rstrip(' '))
        console.write('Convert Urls in Selection to hyperlinks')
    else:
        #editor.replace(editor.getText(),content.rstrip(' '))   #desired, but this text replace doesn't work correctly, but selection replace works, and it's the same code, http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com
        editor.beginUndoAction()
        editor.clearAll();editor.appendText(content.rstrip(' '));   #alternative workaround works
        editor.endUndoAction()
        console.write('Convert Urls in entire document to hyperlinks')

convert_urls_to_hyperlinks() #selection or entire document


# Edit: 10-15-2014
#       Minor edit to allow upper and lowercase, i.e., case-insensitive.
#
# Comment:
#          Just as a note, I wanted to use the editor.replace() method when replacing text
#          in the whole document, but this line of text was not working.
#          http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com
#          (with a few newlines after the url on the previous line)
#          Don't know why, by selection replace works, but the workaround does work 
#          with the clearAll, appendText.
#
#          Any suggestions are welcomed, whether it's the editor.replace() method, or elsewhere.
#          The aim of the comment is to get it correct, and perhaps even reliable.
#          It can easily be made to convert ftp urls as well, e.g., with something like,
#          match = [ s for s in line.split() if ("http" == s[:4] or "ftp" == s[:3]) ]
#          Also, what's with Sourceforge's code highlighting?
#
#          Comments are eagerly accepted.
#          Thanks.

Last edit: Stan S 2014-10-15

# And here is the reverse, hyperlinks to urls.
#
# Script name: hyperlinks_to_urls.py  By: Stan S. Created: 10-15-2014 with v.2.7.8 (45lines)
#
# Purpose: To convert hyperlinks to urls.
#
# How to:  It works either by coverting hyperlinks that are highlighted, selected,
#          or if there is no selection, 
#          it will convert hyperlinks, anchor text tags, in the whole document.
#
# Example:
#
# <a href="http://example.com">http://example.com</a>
#
# to
#
# http://example.com
#

from Npp import *
import os
import re

def convert_hyperlinks_to_urls():
    selection = False
    data = editor.getSelText().replace(os.linesep, '<br>') #since it's html type data, why not as workaround, for case of closing tag on different line than opening tag
    if data != '':
        selection = True
    else:   
        data = editor.getText().replace(os.linesep, '<br>') #since it's html type data, why not as workaround, for case of closing tag on different line than opening tag

    # captures from tag to tag, including tags , using optional or unlimited spaces with \s{0,}
    var = re.findall('<a\s{0,}href\s{0,}=*.*?>*<\s{0,}/\s{0,}a\s{0,}>' , data, re.I ) #case-insensitive

    for item in var:
        # if we wanted the text, this captures the text of a hyperlink, anchor - text tag
        #text = re.findall('<a\s{0,}href\s{0,}=*.*?>([^<]*)<\s{0,}/\s{0,}a\s{0,}>', it, re.I | re.M) # not used in this program

        # captures after href to first >, it may be some of the newlines to <br>, that's ok though , again, using optional or unlimited spaces with \s{0,}
        s = re.findall('<a\s{0,}href\s{0,}=([^<]*.*?)>*<\s{0,}/\s{0,}a\s{0,}>',  item, re.I) #case-insensitive
        val = s[0].strip()
        ch = val[0]
        if ch == '"' or ch == "'":
            #good, we have quotes, let's get the next quote
            pos = val.find(ch, 1) #ok skip first quote char
            if (pos != -1): # got ya
                data = data.replace(item, val[1:pos])
            else:
                console.write('Started with quote, did not end with a quote or the same type of quote - malformed href url address' + '\n')
        else:
            #ok, so if there's an href address that is NOT quoted, ok, but let's agree, no spaces then
            pieces = val.split()
            data = data.replace(item, pieces[0])
                                                # perhaps there's another case, if it's e.g., a file:// type without quotes             
    if(selection):                              # and there's spaces, then perhaps it could be extracted from a common extension, but that's not the scope of this
        editor.replaceSel(data.replace('<br>',os.linesep))
        console.write('Convert hyperlinks in Selection to Urls' + '\n')
    else:
        editor.beginUndoAction()
        editor.clearAll();editor.appendText(data.replace('<br>',os.linesep));
        editor.endUndoAction()
        console.write('Convert hyperlinks in entire document to Urls' + '\n')

convert_hyperlinks_to_urls()


# Edit: 10-15-2014
#       Closing tag can now be on different line than opening tag with newline to <br> workaround.
#       Spaces can now be optional or unlimited using  \s{0,}
#
# Comment:
#          Initially wanted to go with option re.M but 
#          could only get \n on immediate nextline,
#          not ad infinitum to end of document, 
#          if anyone has any suggestions it would be warmly welcomed.
#          Anyway, the newline to <br> and back again workaround 
#          works well enough to not have to wrestle with regex
#          too much.
#
#          I was going for broad reg ex, 
#          modifications would be greatly appreciated.
#
#          Python Script is a great Notepad++ plugin that made this possible.
#          Thanks.

Last edit: Stan S 2014-10-16

Convert Urls to Hyperlinks - by selection or whole document

A Python Scripting plugin for Notepad++

Forums

Help

Convert Urls to Hyperlinks - by selection or whole document

Convert Urls to Hyperlinks - by selection or whole document

A Python Scripting plugin for Notepad++

Forums

Help

Convert Urls to Hyperlinks - by selection or whole document document.SUBSCRIPTION_OPTIONS = { "thing": "topic", "subscribed": false, "url": "subscribe", "icon": { "css": "fa fa-envelope-o" } };

Convert Urls to Hyperlinks - by selection or whole document