Симеон обнови решението на 03.12.2013 02:11 (преди над 4 години)
+package main
+
+import (
+ "regexp"
+ "strconv"
+ "strings"
+)
+
+var (
+ setext_h1_regex string = `^=+$`
+ setext_h2_regex string = `^\-+$`
+ atx_h1_regex = `^#\s(.*?)(\s*?#?)$`
+ atx_h2_regex = `^##\s(.*?)\s*?(##)?$`
+ atx_h3_regex = `^###\s(.*?)\s*?(###)?$`
+ atx_h4_regex = `^####\s(.*?)\s*?(####)?$`
+ atx_h5_regex = `^#####\s(.*?)\s*?(#####)?$`
+ atx_h6_regex = `^######\s(.*?)\s*?(######)?$`
+ names_regex = `( [A-ZА-Я]{1}[a-zа-я]+| \-){2,}`
+ phones_regex = `[\+|\(|\s]?\s*[0-9]{1}[0-9\s\(\)\-]{2,}`
+ links_regex = `(?i)([a-z]+://)?([a-z0-9\-\._\~]+(:[0-9]+)?)?/([-a-z0-9/_\~]+)?(\?[a-z0-9=]+)?(#[a-z0-9=]+)?`
+ emails_regex = `(?i)[a-z0-9]{1}[a-z0-9_\+\.\-]{0,200}@[a-z0-9\-\._\~]+`
+)
+
+type MarkdownParser struct {
+ Text string
+}
+
+func NewMarkdownParser(text string) *MarkdownParser {
+ mp := new(MarkdownParser)
+ mp.Text = text
+ return mp
+}
+
+func (mp *MarkdownParser) Headers() []string {
+ var result []string
+ lines := strings.Split(mp.Text, "\n")
+ setext_h1 := regexp.MustCompile(setext_h1_regex)
+ atx_h1 := regexp.MustCompile(atx_h1_regex)
+
+ for i := 0; i < len(lines); i++ {
+ if setext_h1.MatchString(lines[i]) {
+ result = append(result, lines[i-1])
+ }
+ if atx_h1.MatchString(lines[i]) {
+ matches := atx_h1.FindStringSubmatch(lines[i])
+ result = append(result, matches[1])
+ }
+ }
+
+ return result
+}
+
+func (mp *MarkdownParser) SubHeadersOf(header string) []string {
+ var result []string
+ inTheHeader := false
+ lines := strings.Split(mp.Text, "\n")
+ setext_h1 := regexp.MustCompile(setext_h1_regex)
+ atx_h1 := regexp.MustCompile(atx_h1_regex)
+ setext_h2 := regexp.MustCompile(setext_h2_regex)
+ atx_h2 := regexp.MustCompile(atx_h2_regex)
+
+ for i := 0; i < len(lines); i++ {
+ if setext_h1.MatchString(lines[i]) {
+ inTheHeader = lines[i-1] == header
+ }
+
+ if atx_h1.MatchString(lines[i]) {
+ matches := atx_h1.FindStringSubmatch(lines[i])
+ inTheHeader = matches[1] == header
+ }
+
+ if inTheHeader {
+ if setext_h2.MatchString(lines[i]) {
+ result = append(result, lines[i-1])
+ }
+
+ if atx_h2.MatchString(lines[i]) {
+ matches_sub := atx_h2.FindStringSubmatch(lines[i])
+ result = append(result, matches_sub[1])
+ }
+ }
+ }
+
+ return result
+}
+
+func (mp *MarkdownParser) Names() []string {
+ var result []string
+ lines := strings.Split(mp.Text, "\n")
+ names_re := regexp.MustCompile(names_regex)
+
+ for i := 0; i < len(lines); i++ {
+ if names_re.MatchString(lines[i]) {
+ matches := names_re.FindString(lines[i])
+ result = append(result, strings.TrimSpace(matches))
+ }
+ }
+
+ return result
+}
+
+func (mp *MarkdownParser) PhoneNumbers() []string {
+ var result []string
+ lines := strings.Split(mp.Text, "\n")
+ phones_re := regexp.MustCompile(phones_regex)
+
+ for i := 0; i < len(lines); i++ {
+ if phones_re.MatchString(lines[i]) {
+ matches := phones_re.FindString(lines[i])
+ result = append(result, strings.TrimSpace(matches))
+ }
+ }
+
+ return result
+}
+
+func (mp *MarkdownParser) Links() []string {
+ var result []string
+ lines := strings.Split(mp.Text, "\n")
+ links_re := regexp.MustCompile(links_regex)
+
+ for i := 0; i < len(lines); i++ {
+ if links_re.MatchString(lines[i]) {
+ matches := links_re.FindString(lines[i])
+ result = append(result, strings.TrimSpace(matches))
+ }
+ }
+
+ return result
+}
+
+func (mp *MarkdownParser) Emails() []string {
+ var result []string
+ lines := strings.Split(mp.Text, "\n")
+ emails_re := regexp.MustCompile(emails_regex)
+
+ for i := 0; i < len(lines); i++ {
+ if emails_re.MatchString(lines[i]) {
+ matches := emails_re.FindString(lines[i])
+ result = append(result, strings.TrimSpace(matches))
+ }
+ }
+
+ return result
+}
+
+func (mp *MarkdownParser) GenerateTableOfContents() string {
+ var temp []string
+
+ lines := strings.Split(mp.Text, "\n")
+
+ setext_h1 := regexp.MustCompile(setext_h1_regex)
+ setext_h2 := regexp.MustCompile(setext_h2_regex)
+
+ atx_h1 := regexp.MustCompile(atx_h1_regex)
+ atx_h2 := regexp.MustCompile(atx_h2_regex)
+ atx_h3 := regexp.MustCompile(atx_h3_regex)
+ atx_h4 := regexp.MustCompile(atx_h4_regex)
+ atx_h5 := regexp.MustCompile(atx_h5_regex)
+ atx_h6 := regexp.MustCompile(atx_h6_regex)
+
+ var ind [6]int
+
+ for i := 0; i < len(lines); i++ {
+ header := ""
+
+ if setext_h1.MatchString(lines[i]) {
+ header = lines[i-1]
+ }
+ if atx_h1.MatchString(lines[i]) {
+ matches := atx_h1.FindStringSubmatch(lines[i])
+ header = matches[1]
+ }
+ if header != "" {
+ ind[0]++
+ temp = append(temp, strconv.Itoa(ind[0])+" "+header)
+ continue
+ }
+
+ if setext_h2.MatchString(lines[i]) {
+ header = lines[i-1]
+ }
+ if atx_h2.MatchString(lines[i]) {
+ matches := atx_h2.FindStringSubmatch(lines[i])
+ header = matches[1]
+ }
+ if header != "" {
+ ind[1]++
+ temp = append(temp, strconv.Itoa(ind[0])+"."+strconv.Itoa(ind[1])+" "+header)
+ continue
+ }
+
+ if atx_h3.MatchString(lines[i]) {
+ matches := atx_h3.FindStringSubmatch(lines[i])
+ header = matches[1]
+ }
+ if header != "" {
+ ind[2]++
+ temp = append(temp, strconv.Itoa(ind[0])+"."+strconv.Itoa(ind[1])+"."+strconv.Itoa(ind[2])+" "+header)
+ continue
+ }
+
+ if atx_h4.MatchString(lines[i]) {
+ matches := atx_h4.FindStringSubmatch(lines[i])
+ header = matches[1]
+ }
+ if header != "" {
+ ind[3]++
+ temp = append(temp, strconv.Itoa(ind[0])+"."+strconv.Itoa(ind[1])+"."+strconv.Itoa(ind[2])+"."+strconv.Itoa(ind[3])+" "+header)
+ continue
+ }
+
+ if atx_h5.MatchString(lines[i]) {
+ matches := atx_h5.FindStringSubmatch(lines[i])
+ header = matches[1]
+ }
+ if header != "" {
+ ind[4]++
+ temp = append(temp, strconv.Itoa(ind[0])+"."+strconv.Itoa(ind[1])+"."+strconv.Itoa(ind[2])+"."+strconv.Itoa(ind[3])+"."+strconv.Itoa(ind[4])+" "+header)
+ continue
+ }
+
+ if atx_h6.MatchString(lines[i]) {
+ matches := atx_h6.FindStringSubmatch(lines[i])
+ header = matches[1]
+ }
+ if header != "" {
+ ind[5]++
+ temp = append(temp, strconv.Itoa(ind[0])+"."+strconv.Itoa(ind[1])+"."+strconv.Itoa(ind[2])+"."+strconv.Itoa(ind[3])+"."+strconv.Itoa(ind[4])+"."+strconv.Itoa(ind[5])+" "+header)
+ continue
+ }
+
+ }
+
+ return strings.Join(temp, "\n")
+}
Най-сетне някой, който да се сети да експортне всички регекси отгоре на файла...
Само не ми харесва GenerateTableOfContents
. Ако имахме условие да хващаме h16, един ред щеше да ти е по 900 символа ;)
И аз мисля, че може да се напише по-умно, но организацията на времето ми беше лоша. :) Надявам се това решение да върши работа.