Мартин обнови решението на 01.12.2013 20:45 (преди над 4 години)
+package main
+
+import (
+ "reflect"
+ "regexp"
+ "strconv"
+ "strings"
+)
+
+type MarkdownParser struct {
+ text string
+}
+
+func arraySearch(needle interface{}, haystack interface{}) int {
+ refl := reflect.ValueOf(haystack)
+ for i := 0; i < refl.Len(); i++ {
+ if refl.Index(i).Interface() == needle {
+ return i
+ }
+ }
+ return -1
+}
+
+func NewMarkdownParser(text string) *MarkdownParser {
+ mp := new(MarkdownParser)
+ // Lets remove all \r
+ re := regexp.MustCompile(`\r`)
+ text = re.ReplaceAllLiteralString(text, "")
+
+ mp.text = text
+ return mp
+}
+
+const matchHeaders = `(?m)^(` + // Begin group 1
+ `(` + // Begin group 2
+ `(.+)` + // Group 3
+ `[ \t]*\n[ \t]*=+` +
+ `)` + // End group 2
+ `|(` + // Begin group 4
+ `# [ \t]*(.+?)` + // Group 5
+ `[ \t]*#*[ \t]*` +
+ `)` + // End group 4
+ `)\n+` // End group 1
+
+func (mp *MarkdownParser) Headers() []string {
+ var headers []string
+ re := regexp.MustCompile(matchHeaders)
+ matches := re.FindAllStringSubmatch(mp.text, -1)
+ for _, re := range matches {
+ if len(re) > 5 && re[5] != "" {
+ headers = append(headers, re[5])
+ } else if len(re) > 3 && re[3] != "" {
+ headers = append(headers, re[3])
+ }
+ }
+ return headers
+}
+
+func (mp *MarkdownParser) SubHeadersOf(header string) []string {
+ var subHeaders []string
+ headers := mp.Headers()
+ re := regexp.MustCompile(matchHeaders)
+ parts := re.Split(mp.text, -1)
+ index := arraySearch(header, headers)
+ if index != -1 {
+ // Kind of want to skip checking if content is before the header,
+ // or after the next header, which will be complicated in regex
+ content := parts[index+1]
+ re = regexp.MustCompile(`(?m)^(` + // Begin group 1
+ `(` + // Begin group 2
+ `(.+)` + // Group 3
+ `[ \t]*\n[ \t]*-+` +
+ `)` + // End group 2
+ `|(` + // Begin group 4
+ `## [ \t]*(.+?)` + // Group 5
+ `[ \t]*#*[ \t]*` +
+ `)` + // End group 4
+ `)\n+`)
+ matches := re.FindAllStringSubmatch(content, -1)
+ for _, re := range matches {
+ if len(re) > 5 && re[5] != "" {
+ subHeaders = append(subHeaders, re[5])
+ } else if len(re) > 3 && re[3] != "" {
+ subHeaders = append(subHeaders, re[3])
+ }
+ }
+ }
+ return subHeaders
+}
+
+func (mp *MarkdownParser) Names() []string {
+ var names []string
+ re := regexp.MustCompile(`\p{L} +(((\p{Lu}\p{Ll}*)( |( *- *)))+(\p{Lu}\p{Ll}*))`)
+ matches := re.FindAllStringSubmatch(mp.text, -1)
+ for _, re := range matches {
+ if len(re) > 1 && re[1] != "" {
+ names = append(names, re[1])
+ }
+ }
+
+ return names
+}
+
+func (mp *MarkdownParser) PhoneNumbers() []string {
+ var phones []string
+ re := regexp.MustCompile(`(^|[\t\n\v\f\r ])(((\+ *)|(\( *))?[0-9]+([ \-()]*[0-9]+)*)([\t\n\v\f\r ]|$)`)
+ matches := re.FindAllStringSubmatch(mp.text, -1)
+ for _, re := range matches {
+ if len(re) > 2 && re[2] != "" {
+ phones = append(phones, re[2])
+ }
+ }
+ return phones
+}
+
+func (mp *MarkdownParser) Links() []string {
+ var links []string
+ re := regexp.MustCompile(`\w+://([a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?)+)[-._A-Za-z0-9~:/?#[\]@!$&'()*+,;=]*`)
+ matches := re.FindAllStringSubmatch(mp.text, -1)
+ count := 0
+L:
+ for _, re := range matches {
+ if len(re[1]) > 255 {
+ continue
+ }
+ parts := strings.Split(".", re[1])
+ for _, re := range parts {
+ if len(re) > 63 {
+ continue L
+ }
+ }
+ links = append(links, re[0])
+ count++
+ }
+
+ return links
+}
+
+func (mp *MarkdownParser) Emails() []string {
+ var emails []string
+ re := regexp.MustCompile(`(?m)(^|[\t\n\v\f\r ])([a-zA-Z0-9][-._+a-zA-Z0-9]*@[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?)+[A-Za-z0-9-._~:/?#[\]@!$&'()*+,;=]*)([\t\n\v\f\r ]|$)`)
+ matches := re.FindAllStringSubmatch(mp.text, -1)
+ for _, re := range matches {
+ emails = append(emails, re[2])
+ }
+ return emails
+}
+
+func (mp *MarkdownParser) GenerateTableOfContents() string {
+ content := ""
+ lastLevel := 0
+ count := make([]int, 6)
+ re := regexp.MustCompile(`(?m)^(((.+?)[ \t]*\n[ \t]*(=|-)+\n)|((#{1,6}) [ \t]*(.+)#*[ \t]*))`)
+ matches := re.FindAllStringSubmatch(mp.text, -1)
+ for _, re := range matches {
+ prefix := ""
+ var curLevel int
+ var header string
+ if len(re) == 8 && re[7] != "" {
+ curLevel = len(re[6]) - 1
+ header = re[7]
+ } else {
+ if re[4] == "-" {
+ curLevel = 1
+ } else { // it's =
+ curLevel = 0
+ }
+ header = re[3]
+ }
+ if curLevel < lastLevel {
+ for i := curLevel + 1; i < len(count); i++ {
+ count[i] = 0
+ }
+ }
+ lastLevel = curLevel
+ count[curLevel]++
+ for _, c := range count {
+ if c == 0 {
+ break
+ } else {
+ if len(prefix) > 0 {
+ prefix += "." + strconv.Itoa(c)
+ } else {
+ prefix = strconv.Itoa(c)
+ }
+ }
+ }
+
+ if curLevel == 0 {
+ prefix += "."
+ }
+
+ content += prefix + " " + header + "\n"
+ }
+ return content
+}
+
+func main() {
+
+}