From b29c2b2de4a40ead378ee597164d824d4ae8b3a1 Mon Sep 17 00:00:00 2001 From: Benoit Marty Date: Wed, 3 Jun 2020 18:44:21 +0200 Subject: [PATCH] Send plain text in the body According to https://matrix.org/docs/spec/client_server/latest#m-room-message-msgtypes, the plain text version of the HTML should be provided in the body. Also create MarkdownParser class to be able to unit test it. --- CHANGES.md | 2 +- .../session/room/send/MarkdownParserTest.kt | 278 ++++++++++++++++++ .../android/internal/extensions/Strings.kt | 24 -- .../internal/session/room/RoomModule.kt | 25 ++ .../room/send/LocalEchoEventFactory.kt | 30 +- .../session/room/send/MarkdownParser.kt | 76 +++++ 6 files changed, 383 insertions(+), 52 deletions(-) create mode 100644 matrix-sdk-android/src/androidTest/java/im/vector/matrix/android/internal/session/room/send/MarkdownParserTest.kt delete mode 100644 matrix-sdk-android/src/main/java/im/vector/matrix/android/internal/extensions/Strings.kt create mode 100644 matrix-sdk-android/src/main/java/im/vector/matrix/android/internal/session/room/send/MarkdownParser.kt diff --git a/CHANGES.md b/CHANGES.md index 7588904c95..cd6ffe9995 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -23,7 +23,7 @@ Build đŸ§±: - Other changes: - - + - Send plain text in the body of events containing formatted body, as per https://matrix.org/docs/spec/client_server/latest#m-room-message-msgtypes Changes in RiotX 0.21.0 (2020-05-28) =================================================== diff --git a/matrix-sdk-android/src/androidTest/java/im/vector/matrix/android/internal/session/room/send/MarkdownParserTest.kt b/matrix-sdk-android/src/androidTest/java/im/vector/matrix/android/internal/session/room/send/MarkdownParserTest.kt new file mode 100644 index 0000000000..0dd2261e24 --- /dev/null +++ b/matrix-sdk-android/src/androidTest/java/im/vector/matrix/android/internal/session/room/send/MarkdownParserTest.kt @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2020 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package im.vector.matrix.android.internal.session.room.send + +import androidx.test.ext.junit.runners.AndroidJUnit4 +import im.vector.matrix.android.InstrumentedTest +import org.commonmark.parser.Parser +import org.commonmark.renderer.html.HtmlRenderer +import org.commonmark.renderer.text.TextContentRenderer +import org.junit.Assert.assertEquals +import org.junit.FixMethodOrder +import org.junit.Test +import org.junit.runner.RunWith +import org.junit.runners.MethodSorters + +/** + * It will not be possible to test all combinations. For the moment I add a few tests, then, depending on the problem discovered in the wild, + * we can add more tests to cover the edge cases. + * Some tests are suffixed with `_not_passing`, maybe one day we will fix them... + * Riot-Web should be used as a reference for expected results, but not always. Especially Riot-Web add lots of `\n` in the + * formatted body, which is quite useless. + * Also Riot-Web does not provide plain text body when formatted text is provided. The body contains what the user has entered. + * See https://matrix.org/docs/spec/client_server/latest#m-room-message-msgtypes + */ +@Suppress("SpellCheckingInspection") +@RunWith(AndroidJUnit4::class) +@FixMethodOrder(MethodSorters.JVM) +class MarkdownParserTest : InstrumentedTest { + + /** + * Create the same parser than in the RoomModule + */ + private val markdownParser = MarkdownParser( + Parser.builder().build(), + HtmlRenderer.builder().build(), + TextContentRenderer.builder().build() + ) + + @Test + fun parseNoMarkdown() { + testIdentity("") + testIdentity("a") + testIdentity("1") + testIdentity("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et " + + "dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea com" + + "modo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pari" + + "atur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.") + } + + @Test + fun parseSpaces() { + testIdentity(" ") + testIdentity(" ") + testIdentity("\n") + } + + @Test + fun parseNewLines() { + testIdentity("line1\nline2") + testIdentity("line1\nline2\nline3") + } + + @Test + fun parseBold() { + testType( + name = "bold", + markdownPattern = "**", + htmlExpectedTag = "strong" + ) + } + + @Test + fun parseItalic() { + testType( + name = "italic", + markdownPattern = "*", + htmlExpectedTag = "em" + ) + } + + @Test + fun parseItalic2() { + // Riot-Web format + "_italic_".let { markdownParser.parse(it) }.expect("italic", "italic") + } + + /** + * Note: the test is not passing, it does not work on Riot-Web neither + */ + @Test + fun parseStrike_not_passing() { + testType( + name = "strike", + markdownPattern = "~~", + htmlExpectedTag = "del" + ) + } + + @Test + fun parseCode() { + testType( + name = "code", + markdownPattern = "`", + htmlExpectedTag = "code", + plainTextPrefix = "\"", + plainTextSuffix = "\"" + ) + } + + @Test + fun parseCode2() { + testType( + name = "code", + markdownPattern = "``", + htmlExpectedTag = "code", + plainTextPrefix = "\"", + plainTextSuffix = "\"" + ) + } + + @Test + fun parseCode3() { + testType( + name = "code", + markdownPattern = "```", + htmlExpectedTag = "code", + plainTextPrefix = "\"", + plainTextSuffix = "\"" + ) + } + + @Test + fun parseUnorderedList() { + "- item1".let { markdownParser.parse(it).expect(it, "") } + "- item1\n- item2".let { markdownParser.parse(it).expect(it, "") } + } + + @Test + fun parseOrderedList() { + "1. item1".let { markdownParser.parse(it).expect(it, "
  1. item1
") } + "1. item1\n2. item2".let { markdownParser.parse(it).expect(it, "
  1. item1
  2. item2
") } + } + + @Test + fun parseHorizontalLine() { + "---".let { markdownParser.parse(it) }.expect("***", "
") + } + + @Test + fun parseH2AndContent() { + "a\n---\nb".let { markdownParser.parse(it) }.expect("a\nb", "

a

b

") + } + + @Test + fun parseQuote() { + "> quoted".let { markdownParser.parse(it) }.expect("«quoted»", "

quoted

") + } + + @Test + fun parseQuote_not_passing() { + "> quoted\nline2".let { markdownParser.parse(it) }.expect("«quoted\nline2»", "

quoted
line2

") + } + + @Test + fun parseBoldItalic() { + "*italic* **bold**".let { markdownParser.parse(it) }.expect("italic bold", "italic bold") + "**bold** *italic*".let { markdownParser.parse(it) }.expect("bold italic", "bold italic") + } + + @Test + fun parseHead() { + "# head1".let { markdownParser.parse(it) }.expect("head1", "

head1

") + "## head2".let { markdownParser.parse(it) }.expect("head2", "

head2

") + "### head3".let { markdownParser.parse(it) }.expect("head3", "

head3

") + "#### head4".let { markdownParser.parse(it) }.expect("head4", "

head4

") + "##### head5".let { markdownParser.parse(it) }.expect("head5", "
head5
") + "###### head6".let { markdownParser.parse(it) }.expect("head6", "
head6
") + } + + @Test + fun parseHeads() { + "# head1\n# head2".let { markdownParser.parse(it) }.expect("head1\nhead2", "

head1

head2

") + } + + @Test + fun parseBoldNewLines_not_passing() { + "**bold**\nline2".let { markdownParser.parse(it) }.expect("bold\nline2", "bold
line2") + } + + @Test + fun parseLinks() { + "[link](target)".let { markdownParser.parse(it) }.expect(""""link" (target)""", """link""") + } + + @Test + fun parseParagraph() { + "# head\ncontent".let { markdownParser.parse(it) }.expect("head\ncontent", "

head

content

") + } + + private fun testIdentity(text: String) { + markdownParser.parse(text).expect(text, null) + } + + private fun testType(name: String, + markdownPattern: String, + htmlExpectedTag: String, + plainTextPrefix: String = "", + plainTextSuffix: String = "") { + // Test simple case + "$markdownPattern$name$markdownPattern" + .let { markdownParser.parse(it) } + .expect(expectedText = "$plainTextPrefix$name$plainTextSuffix", + expectedFormattedText = "<$htmlExpectedTag>$name") + + // Test twice the same tag + "$markdownPattern$name$markdownPattern and $markdownPattern$name bis$markdownPattern" + .let { markdownParser.parse(it) } + .expect(expectedText = "$plainTextPrefix$name$plainTextSuffix and $plainTextPrefix$name bis$plainTextSuffix", + expectedFormattedText = "<$htmlExpectedTag>$name and <$htmlExpectedTag>$name bis") + + val textBefore = "a" + val textAfter = "b" + + // With sticked text before + "$textBefore$markdownPattern$name$markdownPattern" + .let { markdownParser.parse(it) } + .expect(expectedText = "$textBefore$plainTextPrefix$name$plainTextSuffix", + expectedFormattedText = "$textBefore<$htmlExpectedTag>$name") + + // With text before and space + "$textBefore $markdownPattern$name$markdownPattern" + .let { markdownParser.parse(it) } + .expect(expectedText = "$textBefore $plainTextPrefix$name$plainTextSuffix", + expectedFormattedText = "$textBefore <$htmlExpectedTag>$name") + + // With sticked text after + "$markdownPattern$name$markdownPattern$textAfter" + .let { markdownParser.parse(it) } + .expect(expectedText = "$plainTextPrefix$name$plainTextSuffix$textAfter", + expectedFormattedText = "<$htmlExpectedTag>$name$textAfter") + + // With space and text after + "$markdownPattern$name$markdownPattern $textAfter" + .let { markdownParser.parse(it) } + .expect(expectedText = "$plainTextPrefix$name$plainTextSuffix $textAfter", + expectedFormattedText = "<$htmlExpectedTag>$name $textAfter") + + // With sticked text before and text after + "$textBefore$markdownPattern$name$markdownPattern$textAfter" + .let { markdownParser.parse(it) } + .expect(expectedText = "$textBefore$plainTextPrefix$name$plainTextSuffix$textAfter", + expectedFormattedText = "a<$htmlExpectedTag>$name$textAfter") + + // With text before and after, with spaces + "$textBefore $markdownPattern$name$markdownPattern $textAfter" + .let { markdownParser.parse(it) } + .expect(expectedText = "$textBefore $plainTextPrefix$name$plainTextSuffix $textAfter", + expectedFormattedText = "$textBefore <$htmlExpectedTag>$name $textAfter") + } + + private fun TextContent.expect(expectedText: String, expectedFormattedText: String?) { + assertEquals("TextContent are not identical", TextContent(expectedText, expectedFormattedText), this) + } +} diff --git a/matrix-sdk-android/src/main/java/im/vector/matrix/android/internal/extensions/Strings.kt b/matrix-sdk-android/src/main/java/im/vector/matrix/android/internal/extensions/Strings.kt deleted file mode 100644 index 09913b9f04..0000000000 --- a/matrix-sdk-android/src/main/java/im/vector/matrix/android/internal/extensions/Strings.kt +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2020 New Vector Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package im.vector.matrix.android.internal.extensions - -/** - * Ex: "abcdef".subStringBetween("a", "f") -> "bcde" - * Ex: "abcdefff".subStringBetween("a", "f") -> "bcdeff" - * Ex: "aaabcdef".subStringBetween("a", "f") -> "aabcde" - */ -internal fun String.subStringBetween(prefix: String, suffix: String) = substringAfter(prefix).substringBeforeLast(suffix) diff --git a/matrix-sdk-android/src/main/java/im/vector/matrix/android/internal/session/room/RoomModule.kt b/matrix-sdk-android/src/main/java/im/vector/matrix/android/internal/session/room/RoomModule.kt index 001ce120c8..77a89a355f 100644 --- a/matrix-sdk-android/src/main/java/im/vector/matrix/android/internal/session/room/RoomModule.kt +++ b/matrix-sdk-android/src/main/java/im/vector/matrix/android/internal/session/room/RoomModule.kt @@ -66,6 +66,9 @@ import im.vector.matrix.android.internal.session.room.typing.DefaultSendTypingTa import im.vector.matrix.android.internal.session.room.typing.SendTypingTask import im.vector.matrix.android.internal.session.room.uploads.DefaultGetUploadsTask import im.vector.matrix.android.internal.session.room.uploads.GetUploadsTask +import org.commonmark.parser.Parser +import org.commonmark.renderer.html.HtmlRenderer +import org.commonmark.renderer.text.TextContentRenderer import retrofit2.Retrofit @Module @@ -79,6 +82,28 @@ internal abstract class RoomModule { fun providesRoomAPI(retrofit: Retrofit): RoomAPI { return retrofit.create(RoomAPI::class.java) } + + @Provides + @JvmStatic + fun providesParser(): Parser { + return Parser.builder().build() + } + + @Provides + @JvmStatic + fun providesHtmlRenderer(): HtmlRenderer { + return HtmlRenderer + .builder() + .build() + } + + @Provides + @JvmStatic + fun providesTextContentRenderer(): TextContentRenderer { + return TextContentRenderer + .builder() + .build() + } } @Binds diff --git a/matrix-sdk-android/src/main/java/im/vector/matrix/android/internal/session/room/send/LocalEchoEventFactory.kt b/matrix-sdk-android/src/main/java/im/vector/matrix/android/internal/session/room/send/LocalEchoEventFactory.kt index 9f95696fba..08dc4e80d8 100644 --- a/matrix-sdk-android/src/main/java/im/vector/matrix/android/internal/session/room/send/LocalEchoEventFactory.kt +++ b/matrix-sdk-android/src/main/java/im/vector/matrix/android/internal/session/room/send/LocalEchoEventFactory.kt @@ -58,14 +58,11 @@ import im.vector.matrix.android.api.session.room.model.relation.ReplyToContent import im.vector.matrix.android.api.session.room.timeline.TimelineEvent import im.vector.matrix.android.api.session.room.timeline.getLastMessageContent import im.vector.matrix.android.internal.di.UserId -import im.vector.matrix.android.internal.extensions.subStringBetween import im.vector.matrix.android.internal.session.content.ThumbnailExtractor import im.vector.matrix.android.internal.session.room.send.pills.TextPillsUtils import im.vector.matrix.android.internal.task.TaskExecutor import im.vector.matrix.android.internal.util.StringProvider import kotlinx.coroutines.launch -import org.commonmark.parser.Parser -import org.commonmark.renderer.html.HtmlRenderer import javax.inject.Inject /** @@ -81,16 +78,11 @@ internal class LocalEchoEventFactory @Inject constructor( private val context: Context, @UserId private val userId: String, private val stringProvider: StringProvider, + private val markdownParser: MarkdownParser, private val textPillsUtils: TextPillsUtils, private val taskExecutor: TaskExecutor, private val localEchoRepository: LocalEchoRepository ) { - // TODO Inject - private val parser = Parser.builder().build() - - // TODO Inject - private val renderer = HtmlRenderer.builder().build() - fun createTextEvent(roomId: String, msgType: String, text: CharSequence, autoMarkdown: Boolean): Event { if (msgType == MessageType.MSGTYPE_TEXT || msgType == MessageType.MSGTYPE_EMOTE) { return createFormattedTextEvent(roomId, createTextContent(text, autoMarkdown), msgType) @@ -101,21 +93,8 @@ internal class LocalEchoEventFactory @Inject constructor( private fun createTextContent(text: CharSequence, autoMarkdown: Boolean): TextContent { if (autoMarkdown) { - val source = textPillsUtils.processSpecialSpansToMarkdown(text) - ?: text.toString() - val document = parser.parse(source) - val htmlText = renderer.render(document) - - // Cleanup extra paragraph - val cleanHtmlText = if (htmlText.startsWith("

") && htmlText.endsWith("

\n")) { - htmlText.subStringBetween("

", "

\n") - } else { - htmlText - } - - if (isFormattedTextPertinent(source, cleanHtmlText)) { - return TextContent(text.toString(), cleanHtmlText) - } + val source = textPillsUtils.processSpecialSpansToMarkdown(text) ?: text.toString() + return markdownParser.parse(source) } else { // Try to detect pills textPillsUtils.processSpecialSpansToHtml(text)?.let { @@ -126,9 +105,6 @@ internal class LocalEchoEventFactory @Inject constructor( return TextContent(text.toString()) } - private fun isFormattedTextPertinent(text: String, htmlText: String?) = - text != htmlText && htmlText != "

${text.trim()}

\n" - fun createFormattedTextEvent(roomId: String, textContent: TextContent, msgType: String): Event { return createMessageEvent(roomId, textContent.toMessageTextContent(msgType)) } diff --git a/matrix-sdk-android/src/main/java/im/vector/matrix/android/internal/session/room/send/MarkdownParser.kt b/matrix-sdk-android/src/main/java/im/vector/matrix/android/internal/session/room/send/MarkdownParser.kt new file mode 100644 index 0000000000..92de583de7 --- /dev/null +++ b/matrix-sdk-android/src/main/java/im/vector/matrix/android/internal/session/room/send/MarkdownParser.kt @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2020 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package im.vector.matrix.android.internal.session.room.send + +import org.commonmark.parser.Parser +import org.commonmark.renderer.html.HtmlRenderer +import org.commonmark.renderer.text.TextContentRenderer +import javax.inject.Inject + +/** + * This class convert a text to an html text + * This class is tested by [MarkdownParserTest]. + * If any change is required, please add a test covering the problem and make sure all the tests are still passing. + */ +internal class MarkdownParser @Inject constructor( + private val parser: Parser, + private val htmlRenderer: HtmlRenderer, + private val textContentRenderer: TextContentRenderer +) { + + private val mdSpecialChars = "[`_\\-\\*>\\.\\[\\]#~]".toRegex() + + fun parse(text: String): TextContent { + // If no special char are detected, just return plain text + if (text.contains(mdSpecialChars).not()) { + return TextContent(text.toString()) + } + + val document = parser.parse(text) + val htmlText = htmlRenderer.render(document) + + // Cleanup extra paragraph + val cleanHtmlText = if (htmlText.lastIndexOf("

") == 0) { + htmlText.removeSurrounding("

", "

\n") + } else { + htmlText + } + + return if (isFormattedTextPertinent(text, cleanHtmlText)) { + // According to https://matrix.org/docs/spec/client_server/latest#m-room-message-msgtypes: + // The plain text version of the HTML should be provided in the body. + val plainText = textContentRenderer.render(document) + TextContent(plainText, cleanHtmlText.postTreatment()) + } else { + TextContent(text.toString()) + } + } + + private fun isFormattedTextPertinent(text: String, htmlText: String?) = + text != htmlText && htmlText != "

${text.trim()}

\n" + + /** + * The parser makes some mistakes, so deal with it here + */ + private fun String.postTreatment(): String { + return this + // Remove extra space before and after the content + .trim() + // There is no need to include new line in an html-like source + .replace("\n", "") + } +}