initial commit
This commit is contained in:
137
parse_code_blocks.odin
Normal file
137
parse_code_blocks.odin
Normal file
@@ -0,0 +1,137 @@
|
||||
package comb
|
||||
|
||||
import "core:strings"
|
||||
import tt "core:testing"
|
||||
|
||||
Code_Block :: struct {
|
||||
line_start: int,
|
||||
line_end: int,
|
||||
char_start: int,
|
||||
char_end: int,
|
||||
language: string,
|
||||
tags: map[string]string,
|
||||
text: string,
|
||||
}
|
||||
|
||||
destroy_code_block :: proc(cb: Code_Block) {
|
||||
delete(cb.tags)
|
||||
}
|
||||
|
||||
ParseError :: enum {
|
||||
None = 0,
|
||||
Incomplete_Code_Block,
|
||||
Invalid_Tag,
|
||||
}
|
||||
|
||||
parse_code_blocks :: proc(s: string) -> (blocks: [dynamic]Code_Block, err: ParseError) {
|
||||
it := s
|
||||
in_code_block := false
|
||||
current_block := Code_Block{}
|
||||
line_index := 0
|
||||
char_index := 0
|
||||
for line in strings.split_lines_iterator(&it) {
|
||||
defer line_index += 1
|
||||
// Add +1 for \n character
|
||||
defer char_index += len(line) + 1
|
||||
trimmed_line := strings.trim_space(line)
|
||||
strings.starts_with(trimmed_line, "```") or_continue
|
||||
in_code_block = !in_code_block
|
||||
// We just finished parsing a code block
|
||||
if !in_code_block {
|
||||
current_block.line_end = line_index
|
||||
current_block.char_end = char_index - 1
|
||||
current_block.text = s[current_block.char_start:current_block.char_end]
|
||||
append(&blocks, current_block)
|
||||
current_block = {}
|
||||
continue
|
||||
}
|
||||
|
||||
// It looks like we're starting a code block, but there's no language specified
|
||||
// That means we shouldn't try to extract it
|
||||
if len(trimmed_line) == 3 {
|
||||
in_code_block = false
|
||||
continue
|
||||
}
|
||||
|
||||
current_block.line_start = line_index
|
||||
current_block.char_start = char_index + len(line) + 1
|
||||
remaining := strings.trim_space(trimmed_line[3:])
|
||||
first_space := strings.index_rune(remaining, ' ')
|
||||
// There are no keys after the language name
|
||||
if first_space == -1 {
|
||||
current_block.language = remaining
|
||||
continue
|
||||
}
|
||||
|
||||
current_block.language = remaining[:first_space]
|
||||
// The +1 is safe because we know there are characters after the space (or else it would have been trimmed)
|
||||
tag_str := remaining[first_space + 1:]
|
||||
opening_curly := strings.index_rune(tag_str, '{')
|
||||
closing_curly := strings.last_index_byte(tag_str, '}')
|
||||
if closing_curly == opening_curly + 1 do continue
|
||||
|
||||
// TODO: allow curlies if they're in strings
|
||||
if opening_curly == -1 || closing_curly == -1 || closing_curly < opening_curly {
|
||||
// TODO: improve error reporting
|
||||
return blocks, .Invalid_Tag
|
||||
}
|
||||
tag_content_str := strings.trim_space(tag_str[opening_curly + 1:closing_curly])
|
||||
if tag_content_str == "" do continue
|
||||
|
||||
tag_content_str_it := tag_content_str
|
||||
for pair in strings.split_iterator(&tag_content_str_it, ",") {
|
||||
pair := strings.trim_space(pair)
|
||||
key_start, key_end, value_start := 0, 0, 0
|
||||
value_end := len(pair)
|
||||
pair_loop: for c, i in pair {
|
||||
if c == '=' {
|
||||
key_end = i
|
||||
value_start = i + 1
|
||||
current_block.tags[string(pair[key_start:key_end])] = string(
|
||||
pair[value_start:value_end],
|
||||
)
|
||||
break pair_loop
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: improve error reporting
|
||||
if in_code_block do return blocks, .Incomplete_Code_Block
|
||||
|
||||
return blocks, .None
|
||||
}
|
||||
|
||||
|
||||
@(test)
|
||||
parse_blocks_correctly :: proc(t: ^tt.T) {
|
||||
blocks, err := parse_code_blocks(example_1)
|
||||
defer delete(blocks)
|
||||
defer for cb in blocks do destroy_code_block(cb)
|
||||
tt.expect(t, err == nil, "parse_blocks_correctly should not error")
|
||||
tt.expect_value(t, len(blocks), 3)
|
||||
{
|
||||
b := blocks[0]
|
||||
tt.expect_value(t, b.language, "python")
|
||||
tt.expect_value(t, b.text, `print("Hello world")`)
|
||||
}
|
||||
|
||||
{
|
||||
b := blocks[1]
|
||||
tt.expect_value(t, len(b.tags), 1)
|
||||
val :=
|
||||
b.tags["Foo"] or_else tt.fail_now(t, "the second code block should have a `Foo` key")
|
||||
tt.expect_value(t, val, "bar")
|
||||
}
|
||||
|
||||
{
|
||||
b := blocks[2]
|
||||
tt.expect_value(t, len(b.tags), 2)
|
||||
val :=
|
||||
b.tags["Foo"] or_else tt.fail_now(t, "the second code block should have a `Foo` key")
|
||||
tt.expect_value(t, val, "bar")
|
||||
val =
|
||||
b.tags["Bazz"] or_else tt.fail_now(t, "the second code block should have a `Bazz` key")
|
||||
tt.expect_value(t, val, "quq")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user