From 3c28de4155e0899b03cddd2921d0b21acaadb8d6 Mon Sep 17 00:00:00 2001 From: Tom Price Date: Wed, 23 Dec 2020 01:30:08 +0000 Subject: [PATCH] Unescape HTML in a skype message to prevent element issues. This remains secure as matrix clients SHOULD only render specific HTML tags, so there is no expectation that server side messages are secure by default. Add basic tests for formatter.parseSkype for the new escaping bug. This could be expanded in the future to include all the other functions. Closes #1 --- formatting.go | 2 + formatting_test.go | 96 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+) create mode 100644 formatting_test.go diff --git a/formatting.go b/formatting.go index 22e0ff6..f87d147 100644 --- a/formatting.go +++ b/formatting.go @@ -18,6 +18,7 @@ var italicRegex = regexp.MustCompile("([\\s>~*]|^)_(.+?)_([^a-zA-Z\\d]|$)") var boldRegex = regexp.MustCompile("([\\s>_~]|^)\\*(.+?)\\*([^a-zA-Z\\d]|$)") var strikethroughRegex = regexp.MustCompile("([\\s>_*]|^)~(.+?)~([^a-zA-Z\\d]|$)") var codeBlockRegex = regexp.MustCompile("```(?:.|\n)+?```") + //var mentionRegex = regexp.MustCompile("@[0-9]+") //var mentionRegex = regexp.MustCompile("@(.*)") var mentionRegex = regexp.MustCompile("]+\\bid=\"([^\"]+)\"(.*?)*") @@ -125,6 +126,7 @@ func (formatter *Formatter) ParseSkype(content *event.MessageEventContent) { } if output != content.Body { output = strings.Replace(output, "\n", "
", -1) + content.Body = html.UnescapeString(content.Body) // skype messages arrive escaped which causes element rendering issues #1 // parse @user message r := regexp.MustCompile(`]+\bid="([^"]+)"(.*?)*`) diff --git a/formatting_test.go b/formatting_test.go new file mode 100644 index 0000000..6070483 --- /dev/null +++ b/formatting_test.go @@ -0,0 +1,96 @@ +package main + +import ( + "github.com/kelaresg/matrix-skype/database" + "github.com/kelaresg/matrix-skype/types" + "maunium.net/go/mautrix/event" + "maunium.net/go/mautrix/format" + "reflect" + "regexp" + "sync" + "testing" +) + +func TestFormatter_ParseSkype(t *testing.T) { + type fields struct { + bridge *Bridge + matrixHTMLParser *format.HTMLParser + waReplString map[*regexp.Regexp]string + waReplFunc map[*regexp.Regexp]func(string) string + waReplFuncText map[*regexp.Regexp]func(string) string + } + type args struct { + content *event.MessageEventContent + } + type expect struct { + content *event.MessageEventContent + } + testUser := &User{ + User: &database.User{ + MXID: "mxtestid", + }, + } + testBridge := &Bridge{ + usersLock: *new(sync.Mutex), + usersByJID: map[types.SkypeID]*User{"test": testUser}, + } + testFormatter := &Formatter{ + bridge: testBridge, + } + tests := []struct { + name string + args args + expect expect + }{ + { + "simple message", + args{ + &event.MessageEventContent{ + Body: "This is a very simple message.", + }, + }, + expect{ + &event.MessageEventContent{ + Body: "This is a very simple message.", + }, + }, + }, + { + "simple punctuation test", + args{ + &event.MessageEventContent{ + Body: "It's the inclusion of "simple" punctuation that causes most of the problems.", + }, + }, + expect{ + &event.MessageEventContent{ + Body: "It's the inclusion of \"simple\" punctuation that causes most of the problems.", + Format: event.FormatHTML, + }, + }, + }, + { + "full punctuation test", + args{ + &event.MessageEventContent{ + Body: "&<>"'", // use a few different encodings + Format: event.FormatHTML, + }, + }, + expect{ + &event.MessageEventContent{ + Body: "&<>\"'", + Format: event.FormatHTML, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + testFormatter.ParseSkype(tt.args.content) + if !reflect.DeepEqual(tt.args.content, tt.expect.content) { + t.Errorf("content = %v, wanted %v", tt.args.content, tt.expect.content) + } + }) + } +}