package comb import "core:strings" import tt "core:testing" Code_Block :: struct { line_start: int, line_end: int, char_start: int, char_end: int, language: string, tags: map[string]string, text: string, } destroy_code_block :: proc(cb: Code_Block) { delete(cb.tags) } ParseError :: enum { None = 0, Incomplete_Code_Block, Invalid_Tag, } parse_code_blocks :: proc(s: string) -> (blocks: [dynamic]Code_Block, err: ParseError) { it := s in_code_block := false current_block := Code_Block{} line_index := 0 char_index := 0 for line in strings.split_lines_iterator(&it) { defer line_index += 1 // Add +1 for \n character defer char_index += len(line) + 1 trimmed_line := strings.trim_space(line) strings.starts_with(trimmed_line, "```") or_continue in_code_block = !in_code_block // We just finished parsing a code block if !in_code_block { current_block.line_end = line_index current_block.char_end = char_index - 1 current_block.text = s[current_block.char_start:current_block.char_end] append(&blocks, current_block) current_block = {} continue } // It looks like we're starting a code block, but there's no language specified // That means we shouldn't try to extract it if len(trimmed_line) == 3 { in_code_block = false continue } current_block.line_start = line_index current_block.char_start = char_index + len(line) + 1 remaining := strings.trim_space(trimmed_line[3:]) first_space := strings.index_rune(remaining, ' ') // There are no keys after the language name if first_space == -1 { current_block.language = remaining continue } current_block.language = remaining[:first_space] // The +1 is safe because we know there are characters after the space (or else it would have been trimmed) tag_str := remaining[first_space + 1:] opening_curly := strings.index_rune(tag_str, '{') closing_curly := strings.last_index_byte(tag_str, '}') if closing_curly == opening_curly + 1 do continue // TODO: allow curlies if they're in strings if opening_curly == -1 || closing_curly == -1 || closing_curly < opening_curly { // TODO: improve error reporting return blocks, .Invalid_Tag } tag_content_str := strings.trim_space(tag_str[opening_curly + 1:closing_curly]) if tag_content_str == "" do continue tag_content_str_it := tag_content_str for pair in strings.split_iterator(&tag_content_str_it, ",") { pair := strings.trim_space(pair) key_start, key_end, value_start := 0, 0, 0 value_end := len(pair) pair_loop: for c, i in pair { if c == '=' { key_end = i value_start = i + 1 current_block.tags[string(pair[key_start:key_end])] = string( pair[value_start:value_end], ) break pair_loop } } } } // TODO: improve error reporting if in_code_block do return blocks, .Incomplete_Code_Block return blocks, .None } @(test) parse_blocks_correctly :: proc(t: ^tt.T) { blocks, err := parse_code_blocks(example_1) defer delete(blocks) defer for cb in blocks do destroy_code_block(cb) tt.expect(t, err == nil, "parse_blocks_correctly should not error") tt.expect_value(t, len(blocks), 3) { b := blocks[0] tt.expect_value(t, b.language, "python") tt.expect_value(t, b.text, `print("Hello world")`) } { b := blocks[1] tt.expect_value(t, len(b.tags), 1) val := b.tags["Foo"] or_else tt.fail_now(t, "the second code block should have a `Foo` key") tt.expect_value(t, val, "bar") } { b := blocks[2] tt.expect_value(t, len(b.tags), 2) val := b.tags["Foo"] or_else tt.fail_now(t, "the second code block should have a `Foo` key") tt.expect_value(t, val, "bar") val = b.tags["Bazz"] or_else tt.fail_now(t, "the second code block should have a `Bazz` key") tt.expect_value(t, val, "quq") } }