Files
comb/parse_code_blocks.odin
2025-12-18 11:37:25 -05:00

138 lines
3.7 KiB
Odin

package comb
import "core:strings"
import tt "core:testing"
Code_Block :: struct {
line_start: int,
line_end: int,
char_start: int,
char_end: int,
language: string,
tags: map[string]string,
text: string,
}
destroy_code_block :: proc(cb: Code_Block) {
delete(cb.tags)
}
ParseError :: enum {
None = 0,
Incomplete_Code_Block,
Invalid_Tag,
}
parse_code_blocks :: proc(s: string) -> (blocks: [dynamic]Code_Block, err: ParseError) {
it := s
in_code_block := false
current_block := Code_Block{}
line_index := 0
char_index := 0
for line in strings.split_lines_iterator(&it) {
defer line_index += 1
// Add +1 for \n character
defer char_index += len(line) + 1
trimmed_line := strings.trim_space(line)
strings.starts_with(trimmed_line, "```") or_continue
in_code_block = !in_code_block
// We just finished parsing a code block
if !in_code_block {
current_block.line_end = line_index
current_block.char_end = char_index - 1
current_block.text = s[current_block.char_start:current_block.char_end]
append(&blocks, current_block)
current_block = {}
continue
}
// It looks like we're starting a code block, but there's no language specified
// That means we shouldn't try to extract it
if len(trimmed_line) == 3 {
in_code_block = false
continue
}
current_block.line_start = line_index
current_block.char_start = char_index + len(line) + 1
remaining := strings.trim_space(trimmed_line[3:])
first_space := strings.index_rune(remaining, ' ')
// There are no keys after the language name
if first_space == -1 {
current_block.language = remaining
continue
}
current_block.language = remaining[:first_space]
// The +1 is safe because we know there are characters after the space (or else it would have been trimmed)
tag_str := remaining[first_space + 1:]
opening_curly := strings.index_rune(tag_str, '{')
closing_curly := strings.last_index_byte(tag_str, '}')
if closing_curly == opening_curly + 1 do continue
// TODO: allow curlies if they're in strings
if opening_curly == -1 || closing_curly == -1 || closing_curly < opening_curly {
// TODO: improve error reporting
return blocks, .Invalid_Tag
}
tag_content_str := strings.trim_space(tag_str[opening_curly + 1:closing_curly])
if tag_content_str == "" do continue
tag_content_str_it := tag_content_str
for pair in strings.split_iterator(&tag_content_str_it, ",") {
pair := strings.trim_space(pair)
key_start, key_end, value_start := 0, 0, 0
value_end := len(pair)
pair_loop: for c, i in pair {
if c == '=' {
key_end = i
value_start = i + 1
current_block.tags[string(pair[key_start:key_end])] = string(
pair[value_start:value_end],
)
break pair_loop
}
}
}
}
// TODO: improve error reporting
if in_code_block do return blocks, .Incomplete_Code_Block
return blocks, .None
}
@(test)
parse_blocks_correctly :: proc(t: ^tt.T) {
blocks, err := parse_code_blocks(example_1)
defer delete(blocks)
defer for cb in blocks do destroy_code_block(cb)
tt.expect(t, err == nil, "parse_blocks_correctly should not error")
tt.expect_value(t, len(blocks), 3)
{
b := blocks[0]
tt.expect_value(t, b.language, "python")
tt.expect_value(t, b.text, `print("Hello world")`)
}
{
b := blocks[1]
tt.expect_value(t, len(b.tags), 1)
val :=
b.tags["Foo"] or_else tt.fail_now(t, "the second code block should have a `Foo` key")
tt.expect_value(t, val, "bar")
}
{
b := blocks[2]
tt.expect_value(t, len(b.tags), 2)
val :=
b.tags["Foo"] or_else tt.fail_now(t, "the second code block should have a `Foo` key")
tt.expect_value(t, val, "bar")
val =
b.tags["Bazz"] or_else tt.fail_now(t, "the second code block should have a `Bazz` key")
tt.expect_value(t, val, "quq")
}
}