Merge pull request #4 from tomtom5152/html-escape

Unescape HTML in a skype message to prevent element issues.
This commit is contained in:
pidong 2020-12-24 12:06:52 +08:00 committed by GitHub
commit 4eebaf9231
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 98 additions and 0 deletions

View File

@ -18,6 +18,7 @@ var italicRegex = regexp.MustCompile("([\\s>~*]|^)_(.+?)_([^a-zA-Z\\d]|$)")
var boldRegex = regexp.MustCompile("([\\s>_~]|^)\\*(.+?)\\*([^a-zA-Z\\d]|$)") var boldRegex = regexp.MustCompile("([\\s>_~]|^)\\*(.+?)\\*([^a-zA-Z\\d]|$)")
var strikethroughRegex = regexp.MustCompile("([\\s>_*]|^)~(.+?)~([^a-zA-Z\\d]|$)") var strikethroughRegex = regexp.MustCompile("([\\s>_*]|^)~(.+?)~([^a-zA-Z\\d]|$)")
var codeBlockRegex = regexp.MustCompile("```(?:.|\n)+?```") var codeBlockRegex = regexp.MustCompile("```(?:.|\n)+?```")
//var mentionRegex = regexp.MustCompile("@[0-9]+") //var mentionRegex = regexp.MustCompile("@[0-9]+")
//var mentionRegex = regexp.MustCompile("@(.*)") //var mentionRegex = regexp.MustCompile("@(.*)")
var mentionRegex = regexp.MustCompile("<at[^>]+\\bid=\"([^\"]+)\"(.*?)</at>*") var mentionRegex = regexp.MustCompile("<at[^>]+\\bid=\"([^\"]+)\"(.*?)</at>*")
@ -125,6 +126,7 @@ func (formatter *Formatter) ParseSkype(content *event.MessageEventContent) {
} }
if output != content.Body { if output != content.Body {
output = strings.Replace(output, "\n", "<br/>", -1) output = strings.Replace(output, "\n", "<br/>", -1)
content.Body = html.UnescapeString(content.Body) // skype messages arrive escaped which causes element rendering issues #1
// parse @user message // parse @user message
r := regexp.MustCompile(`<at[^>]+\bid="([^"]+)"(.*?)</at>*`) r := regexp.MustCompile(`<at[^>]+\bid="([^"]+)"(.*?)</at>*`)

96
formatting_test.go Normal file
View File

@ -0,0 +1,96 @@
package main
import (
"github.com/kelaresg/matrix-skype/database"
"github.com/kelaresg/matrix-skype/types"
"maunium.net/go/mautrix/event"
"maunium.net/go/mautrix/format"
"reflect"
"regexp"
"sync"
"testing"
)
func TestFormatter_ParseSkype(t *testing.T) {
type fields struct {
bridge *Bridge
matrixHTMLParser *format.HTMLParser
waReplString map[*regexp.Regexp]string
waReplFunc map[*regexp.Regexp]func(string) string
waReplFuncText map[*regexp.Regexp]func(string) string
}
type args struct {
content *event.MessageEventContent
}
type expect struct {
content *event.MessageEventContent
}
testUser := &User{
User: &database.User{
MXID: "mxtestid",
},
}
testBridge := &Bridge{
usersLock: *new(sync.Mutex),
usersByJID: map[types.SkypeID]*User{"test": testUser},
}
testFormatter := &Formatter{
bridge: testBridge,
}
tests := []struct {
name string
args args
expect expect
}{
{
"simple message",
args{
&event.MessageEventContent{
Body: "This is a very simple message.",
},
},
expect{
&event.MessageEventContent{
Body: "This is a very simple message.",
},
},
},
{
"simple punctuation test",
args{
&event.MessageEventContent{
Body: "It&apos;s the inclusion of &quot;simple&quot; punctuation that causes most of the problems.",
},
},
expect{
&event.MessageEventContent{
Body: "It's the inclusion of \"simple\" punctuation that causes most of the problems.",
Format: event.FormatHTML,
},
},
},
{
"full punctuation test",
args{
&event.MessageEventContent{
Body: "&amp;&lt;&gt;&quot;&#39", // use a few different encodings
Format: event.FormatHTML,
},
},
expect{
&event.MessageEventContent{
Body: "&<>\"'",
Format: event.FormatHTML,
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
testFormatter.ParseSkype(tt.args.content)
if !reflect.DeepEqual(tt.args.content, tt.expect.content) {
t.Errorf("content = %v, wanted %v", tt.args.content, tt.expect.content)
}
})
}
}