I used chinese in article name. When executed MediaWikiOpen to browse the mediawiki search page,the search words in some error codes.
I used google to find the answer how to encode unicode in url and it work .
this is the code
IExplorer MW_SearchAddress & URLEncodeUTF8(DocInfo.ArticleName)
'UTF8 URLEncode
Public Function URLEncodeUTF8(ByVal s)
Dim i, k
Dim sl
Dim c
Dim uni
Dim tp, h1, h2, h3
sl = Len(s)
tp = ""
k = 0
For i = 1 To sl
c = Mid(s, i, 1)
uni = AscW(c)
If uni < 0 Then uni = uni + 65536
If uni < 128 Then
tp = tp & c
k = k + 1
ElseIf uni < 2048 Then
k = k + 2
h2 = "%" & Hex(&H80 Xor (uni And &H3F))
uni = uni \ (2 ^ 6)
h1 = "%" & Hex(&HC0 Xor (uni And &H1F))
tp = tp & h1 & h2
Else
h3 = "%" & Hex(&H80 Xor (uni And &H3F))
uni = uni \ (2 ^ 6)
h2 = "%" & Hex(&H80 Xor (uni And &H3F))
uni = uni \ (2 ^ 6)
h1 = "%" & Hex(&HE0 Xor (uni And &HF))
tp = tp & h1 & h2 & h3
End If
Next
URLEncodeUTF8 = tp
End Function
'A-Fa-f0-9 Byte
Public Function isxdigit(c)
isxdigit = CBool((c >= 48 And c <= 57) Or (c >= 65 And c <= 70) Or (c >= 97 And c <= 102))
End Function
Public Function isascii(c)
isascii = CBool(c > 0 And c < 128)
End Function
Public Function IsUTF8Body(ByVal u)
IsUTF8Body = CBool(u >= &H80 And u <= &HBF)
End Function
Private Function UTF8Byte(ByVal u)
If u > &H0 And u <= &H7F Then
UTF8Byte = 1
ElseIf u >= &HC0 And u <= &HDF Then
UTF8Byte = 2
ElseIf u >= &HE0 And u <= &HEF Then
UTF8Byte = 3
ElseIf u >= &HF0 And u <= &HF7 Then
UTF8Byte = 4
ElseIf u >= &HF8 And u <= &HFB Then
UTF8Byte = 5
ElseIf u >= &HFC And u <= &HFD Then
UTF8Byte = 6
Else
UTF8Byte = 0
End If
End Function
Private Function UTF8Test(ByVal u1, ByVal u2, ByVal u3)
UTF8Test = False
If CBool(u1 >= &HC0 And u1 <= &HFD) Then
UTF8Test = CBool(IsUTF8Body(u2) And IsUTF8Body(u3))
End If
End Function
Private Function ishex(s)
ishex = False
If Len(s) < 2 Then Exit Function
If isxdigit(Asc(Mid(s, 1, 1))) = False Then Exit Function
If isxdigit(Asc(Mid(s, 2, 1))) = False Then Exit Function
ishex = True
End Function
Private Function isescape(s)
isescape = False
If Len(s) < 5 Then Exit Function
If UCase(Mid(s, 1, 1)) <> "U" Then Exit Function
If isxdigit(Asc(Mid(s, 2, 1))) = False Then Exit Function
If isxdigit(Asc(Mid(s, 3, 1))) = False Then Exit Function
If isxdigit(Asc(Mid(s, 4, 1))) = False Then Exit Function
If isxdigit(Asc(Mid(s, 5, 1))) = False Then Exit Function
isescape = True
End Function
Private Function AscX(s)
AscX = CInt("&H" & Mid(s, 1, 2))
End Function
Public Function URLDecodeUTF8(s)
Dim tp
Dim i
Dim tl
Dim pp
Dim a, b, c
Dim h
URLDecodeUTF8 = ""
tp = Split(Replace(s, "+", " "), "%")
tl = UBound(tp)
If tl = -1 Then Exit Function
pp = tp(0)
For i = 1 To tl
If isescape(tp(i)) Then
pp = pp & ChrW("&H" & Mid(tp(i), 2, 4)) & Mid(tp(i), 6)
ElseIf ishex(tp(i)) = False Then
pp = pp & tp(i)
Else
a = AscX(tp(i))
If isascii(a) = False And Len(tp(i)) = 2 Then
If (i + 1) > tl Then Exit For
b = AscX(tp(i + 1))
If (i + 2) > tl Then
pp = pp & Chr(a * 2 ^ 8 Or b) & Mid(tp(i + 1), 3)
i = i + 1
Else
c = AscX(tp(i + 2))
If UTF8Byte(a) = 3 And UTF8Test(a, b, c) = True Then
h = (a And &HF) * 2 ^ 12 Or (b And &H3F) * 2 ^ 6 Or (c And &H3F)
If h < 0 Then h = h + 65536
pp = pp & ChrW(h) & Mid(tp(i + 2), 3)
i = i + 2
Else
pp = pp & Chr(a * 2 ^ 8 Or b) & Mid(tp(i + 1), 3)
i = i + 1
End If
End If
ElseIf isascii(a) = False Then
pp = pp & tp(i)
Else
pp = pp & Chr(a) & Mid(tp(i), 3)
End If
End If
Next
URLDecodeUTF8 = pp
End Function
leeichang 2007-03-03
Logged In: YES
user_id=2032487
Originator: NO
This code does work properly for the URL, but it causes failure when the macro tries to save images (which use the article name). We need to somehow detect invalid characters in file names. We should probably be doing this anyway though, so I'll look into it some more.
Logged In: YES
user_id=2032487
Originator: NO
Okay, I added support for this to the Word2007 branch with r33. The macro should be able to open pages with unicode characters.