Решение на Markdown от Недялко Андреев

Обратно към всички решения

Към профила на Недялко Андреев

Резултати

  • 9 точки от тестове
  • 0 бонус точки
  • 9 точки общо
  • 6 успешни тест(а)
  • 1 неуспешни тест(а)

Код

package main
import (
"regexp"
"strconv"
)
// A stateless parser implementation that's pretty unoptimized but somewhat flexible
//NOTE: there probably are some of off-by-one errors, too sleepy to test though...
type MarkdownParser struct {
text string
}
type Element struct {
ElementStart, ElementEnd int
TextStart, TextEnd int
MatchedPatterns []*regexp.Regexp
}
func getHeaderRegexPatterns(level int) []*regexp.Regexp {
//TODO: directly cache the compiled regexes: it's a bit wasteful to compute the them every time...
if level < 1 || level > 6 {
panic("OMG")
}
patterns := make([]*regexp.Regexp, 2)
patterns[0] = regexp.MustCompile(`(?m)^#{` + strconv.Itoa(level) + `}[ \t]*([^#].*?)[ \t]*#*\n+`)
if level > 2 {
return patterns[:1]
}
if level == 1 {
patterns[1] = regexp.MustCompile(`(?m)^(.+)[ \t]*\n=+[ \t]*\n+`)
} else if level == 2 {
patterns[1] = regexp.MustCompile(`(?m)^(.+)[ \t]*\n-+[ \t]*\n+`)
}
return patterns
}
func (mp *MarkdownParser) getText(from, to int) string {
if to < 0 || to > len(mp.text) {
to = len(mp.text)
}
if from > to {
return ""
}
return mp.text[from:to]
}
func (mp *MarkdownParser) getFirstElement(patterns []*regexp.Regexp, from, to int) (bool, Element) {
var minResult, currentResult []int
for _, re := range patterns {
if re.NumSubexp() != 1 {
panic("Invalid regex was supplied")
}
currentResult = re.FindStringSubmatchIndex(mp.getText(from, to))
if len(minResult) == 0 || (len(currentResult) > 0 && (currentResult[0] < minResult[0])) {
minResult = currentResult
}
}
if len(minResult) == 0 {
return false, Element{}
}
return true, Element{minResult[0] + from, minResult[1] + from, minResult[2] + from, minResult[3] + from, patterns}
}
func (mp *MarkdownParser) findFirstElementByContent(patterns []*regexp.Regexp, content string, from, to int) (bool, Element) {
var currentFrom int = from
for {
found, elPos := mp.getFirstElement(patterns, currentFrom, to)
if !found {
return false, Element{}
}
if mp.getText(elPos.TextStart, elPos.TextEnd) == content {
return true, elPos
}
currentFrom = elPos.ElementEnd
}
}
func (mp *MarkdownParser) getElementsText(patterns []*regexp.Regexp, from, to int) []string {
var result []string = nil
var currentFrom int = from
for {
found, elPos := mp.getFirstElement(patterns, currentFrom, to)
if !found {
break
}
result = append(result, mp.getText(elPos.TextStart, elPos.TextEnd))
currentFrom = elPos.ElementEnd
}
return result
}
func (mp *MarkdownParser) getMatchedText(pattern string, matchPosition int) []string {
var result []string = nil
re := regexp.MustCompile(pattern)
for _, match := range re.FindAllStringSubmatch(mp.text, -1) {
if len(match) >= matchPosition {
result = append(result, match[matchPosition])
}
}
return result
}
func (mp *MarkdownParser) getChildrenOf(el Element, patterns []*regexp.Regexp) []Element {
var result []Element
var currentFrom int = el.ElementEnd
var currentElementEnd int = -1
if el.ElementEnd > 0 {
foundNextSibling, nextSiblingPos := mp.getFirstElement(el.MatchedPatterns, el.ElementEnd, -1)
if foundNextSibling {
currentElementEnd = nextSiblingPos.ElementStart
}
}
for {
found, elPos := mp.getFirstElement(patterns, currentFrom, currentElementEnd)
if !found {
break
}
result = append(result, elPos)
currentFrom = elPos.ElementEnd
}
//fmt.Printf("mdp.getChildrenOf(): %v\n\n\n", result)
return result
}
func (mp *MarkdownParser) getContentsOf(el Element, prefix string, level int) string {
result := "" //TODO: use a string builder
for i, subEl := range mp.getChildrenOf(el, getHeaderRegexPatterns(level)) {
newPrefix := prefix + "." + strconv.Itoa(i+1)
result += newPrefix + " " + mp.getText(subEl.TextStart, subEl.TextEnd) + "\n"
if level < 6 {
subresult := mp.getContentsOf(subEl, newPrefix, level+1)
if subresult != "" {
result += subresult
}
}
}
return result
}
// The required methods:
func NewMarkdownParser(text string) *MarkdownParser {
result := new(MarkdownParser)
result.text = text
return result
}
func (mp *MarkdownParser) Headers() []string {
return mp.getElementsText(getHeaderRegexPatterns(1), 0, -1)
}
func (mp *MarkdownParser) SubHeadersOf(header string) []string {
var result []string
found, h1 := mp.findFirstElementByContent(getHeaderRegexPatterns(1), header, 0, -1)
if !found {
return nil
}
for _, h2 := range mp.getChildrenOf(h1, getHeaderRegexPatterns(2)) {
result = append(result, mp.getText(h2.TextStart, h2.TextEnd))
}
return result
}
func (mp *MarkdownParser) Names() []string {
return mp.getMatchedText(`[^\.\n"'\s][ \t]*(\p{Lu}\p{Ll}*([ -]+\p{Lu}\p{Ll}*)+)`, 1)
}
func (mp *MarkdownParser) PhoneNumbers() []string {
return mp.getMatchedText(`[^\d\pL\w-]([+\(]?\d[\d\(\)\- ]*\d)`, 1)
}
func (mp *MarkdownParser) Links() []string {
return mp.getMatchedText(`(https?:\/\/[^\s]+)`, 1)
}
func (mp *MarkdownParser) Emails() []string {
return mp.getMatchedText(`[\s\n^]([a-zA-Z0-9][a-zA-Z0-9_\+\.\-]{0,200}@(?:[a-zA-Z0-9]+(?:\-*[a-zA-Z0-9])*\.)+[a-zA-Z]{2,6})`, 1)
}
func (mp *MarkdownParser) GenerateTableOfContents() string {
//RANT: code duplication is neccasary because of the stupid rule that only H1 numbers should end with "." in the TOC...
result := "" //TODO: use a string builder
for i, subEl := range mp.getChildrenOf(Element{}, getHeaderRegexPatterns(1)) {
prefix := strconv.Itoa(i + 1)
result += prefix + ". " + mp.getText(subEl.TextStart, subEl.TextEnd) + "\n"
subresult := mp.getContentsOf(subEl, prefix, 2)
if subresult != "" {
result += subresult
}
}
return result
}

Лог от изпълнението

PASS
ok  	_/tmp/d20140106-32701-16kaw2f	0.049s
PASS
ok  	_/tmp/d20140106-32701-16kaw2f	0.012s
PASS
ok  	_/tmp/d20140106-32701-16kaw2f	0.012s
--- FAIL: TestPhoneNumbers (0.00 seconds)
	solution_test.go:86: Not equal:
		  []string{"0889123456", "0 (889) 123", "456", "+45-(31"}
		  []string{"0889123456", "0 (889) 123", "456", "+45-(31)"}
FAIL
exit status 1
FAIL	_/tmp/d20140106-32701-16kaw2f	0.013s
PASS
ok  	_/tmp/d20140106-32701-16kaw2f	0.012s
PASS
ok  	_/tmp/d20140106-32701-16kaw2f	0.013s
PASS
ok  	_/tmp/d20140106-32701-16kaw2f	0.013s

История (2 версии и 1 коментар)

Недялко обнови решението на 03.12.2013 04:49 (преди над 4 години)

+package main
+
+import (
+ //"fmt"
+ //"io/ioutil"
+ "regexp"
+ "strconv"
+)
+
+// A stateless parser implementation that's pretty unoptimized but somewhat flexible
+//NOTE: there probably are some of off-by-one errors, too sleepy to test though...
+
+type MarkdownParser struct {
+ text string
+}
+
+type Element struct {
+ ElementStart, ElementEnd int
+ TextStart, TextEnd int
+ MatchedPatterns []*regexp.Regexp
+}
+
+func getHeaderRegexPatterns(level int) []*regexp.Regexp {
+ //TODO: directly cache the compiled regexes: it's a bit wasteful to compute the them every time...
+ if level < 1 || level > 6 {
+ panic("OMG")
+ }
+
+ patterns := make([]*regexp.Regexp, 2)
+ patterns[0] = regexp.MustCompile(`(?m)^#{` + strconv.Itoa(level) + `}[ \t]*([^#].*?)[ \t]*#*\n+`)
+ if level > 2 {
+ return patterns[:1]
+ }
+
+ if level == 1 {
+ patterns[1] = regexp.MustCompile(`(?m)^(.+)[ \t]*\n=+[ \t]*\n+`)
+ } else if level == 2 {
+ patterns[1] = regexp.MustCompile(`(?m)^(.+)[ \t]*\n-+[ \t]*\n+`)
+ }
+
+ return patterns
+}
+
+func (mp *MarkdownParser) getText(from, to int) string {
+ if to < 0 || to > len(mp.text) {
+ to = len(mp.text)
+ }
+ if from > to {
+ return ""
+ }
+
+ return mp.text[from:to]
+}
+
+func (mp *MarkdownParser) getFirstElement(patterns []*regexp.Regexp, from, to int) (bool, Element) {
+ var minResult, currentResult []int
+ for _, re := range patterns {
+ if re.NumSubexp() != 1 {
+ panic("Invalid regex was supplied")
+ }
+
+ currentResult = re.FindStringSubmatchIndex(mp.getText(from, to))
+
+ if len(minResult) == 0 || (len(currentResult) > 0 && (currentResult[0] < minResult[0])) {
+ minResult = currentResult
+ }
+ }
+
+ if len(minResult) == 0 {
+ return false, Element{}
+ }
+
+ return true, Element{minResult[0] + from, minResult[1] + from, minResult[2] + from, minResult[3] + from, patterns}
+}
+
+func (mp *MarkdownParser) findFirstElementByContent(patterns []*regexp.Regexp, content string, from, to int) (bool, Element) {
+ var currentFrom int = from
+
+ for {
+ found, elPos := mp.getFirstElement(patterns, currentFrom, to)
+ if !found {
+ return false, Element{}
+ }
+
+ if mp.getText(elPos.TextStart, elPos.TextEnd) == content {
+ return true, elPos
+ }
+
+ currentFrom = elPos.ElementEnd
+ }
+}
+
+func (mp *MarkdownParser) getElementsText(patterns []*regexp.Regexp, from, to int) []string {
+ var result []string = nil
+ var currentFrom int = from
+
+ for {
+ found, elPos := mp.getFirstElement(patterns, currentFrom, to)
+ if !found {
+ break
+ }
+
+ result = append(result, mp.getText(elPos.TextStart, elPos.TextEnd))
+ currentFrom = elPos.ElementEnd
+ }
+
+ return result
+}
+
+func (mp *MarkdownParser) getMatchedText(pattern string, matchPosition int) []string {
+ var result []string = nil
+
+ re := regexp.MustCompile(pattern)
+
+ for _, match := range re.FindAllStringSubmatch(mp.text, -1) {
+ if len(match) >= matchPosition {
+ result = append(result, match[matchPosition])
+ }
+ }
+
+ return result
+}
+
+func (mp *MarkdownParser) getChildrenOf(el Element, patterns []*regexp.Regexp) []Element {
+ var result []Element
+ var currentFrom int = el.ElementEnd
+ var currentElementEnd int = -1
+
+ if el.ElementEnd > 0 {
+ foundNextSibling, nextSiblingPos := mp.getFirstElement(el.MatchedPatterns, el.ElementEnd, -1)
+ if foundNextSibling {
+ currentElementEnd = nextSiblingPos.ElementStart
+ }
+ }
+
+ for {
+ found, elPos := mp.getFirstElement(patterns, currentFrom, currentElementEnd)
+ if !found {
+ break
+ }
+
+ result = append(result, elPos)
+ currentFrom = elPos.ElementEnd
+ }
+
+ //fmt.Printf("mdp.getChildrenOf(): %v\n\n\n", result)
+
+ return result
+}
+
+func (mp *MarkdownParser) getContentsOf(el Element, prefix string, level int) string {
+ result := "" //TODO: use a string builder
+ for i, subEl := range mp.getChildrenOf(el, getHeaderRegexPatterns(level)) {
+
+ newPrefix := prefix + "." + strconv.Itoa(i+1)
+ result += newPrefix + " " + mp.getText(subEl.TextStart, subEl.TextEnd) + "\n"
+
+ if level < 6 {
+ subresult := mp.getContentsOf(subEl, newPrefix, level+1)
+ if subresult != "" {
+ result += subresult
+ }
+ }
+ }
+ return result
+}
+
+// The required methods:
+
+func NewMarkdownParser(text string) *MarkdownParser {
+ result := new(MarkdownParser)
+ result.text = text
+ return result
+}
+
+func (mp *MarkdownParser) Headers() []string {
+ return mp.getElementsText(getHeaderRegexPatterns(1), 0, -1)
+}
+
+func (mp *MarkdownParser) SubHeadersOf(header string) []string {
+ var result []string
+
+ found, h1 := mp.findFirstElementByContent(getHeaderRegexPatterns(1), header, 0, -1)
+ if !found {
+ return nil
+ }
+
+ for _, h2 := range mp.getChildrenOf(h1, getHeaderRegexPatterns(2)) {
+ result = append(result, mp.getText(h2.TextStart, h2.TextEnd))
+ }
+
+ return result
+}
+
+func (mp *MarkdownParser) Names() []string {
+ return mp.getMatchedText(`[^\.\n"'\s][ \t]*(\p{Lu}\p{Ll}*([ -]+\p{Lu}\p{Ll}*)+)`, 1)
+}
+
+func (mp *MarkdownParser) PhoneNumbers() []string {
+ return mp.getMatchedText(`[^\d\pL\w-]([+\(]?\d[\d\(\)\- ]*\d)`, 1)
+}
+
+func (mp *MarkdownParser) Links() []string {
+ return mp.getMatchedText(`(https?:\/\/[^\s]+)`, 1)
+}
+func (mp *MarkdownParser) Emails() []string {
+ return mp.getMatchedText(`[\s\n^]([a-zA-Z0-9][a-zA-Z0-9_\+\.\-]{0,200}@(?:[a-zA-Z0-9]+(?:\-*[a-zA-Z0-9])*\.)+[a-zA-Z]{2,6})`, 1)
+}
+
+func (mp *MarkdownParser) GenerateTableOfContents() string {
+ //RANT: code duplication is neccasary because of the stupid rule that only H1 numbers should end with "." in the TOC...
+ result := "" //TODO: use a string builder
+ for i, subEl := range mp.getChildrenOf(Element{}, getHeaderRegexPatterns(1)) {
+
+ prefix := strconv.Itoa(i + 1)
+ result += prefix + ". " + mp.getText(subEl.TextStart, subEl.TextEnd) + "\n"
+ subresult := mp.getContentsOf(subEl, prefix, 2)
+ if subresult != "" {
+ result += subresult
+ }
+ }
+ return result
+}
+
+/*
+func main() {
+ content, err := ioutil.ReadFile("./README.md")
+ if err != nil {
+ return
+ }
+
+ mdp := NewMarkdownParser(string(content))
+
+ //fmt.Printf("mdp.Headers(): %v\n\n\n", mdp.Headers())
+ //fmt.Printf("mdp.SubHeadersOf('MarkdownParser'): %v\n\n\n", mdp.SubHeadersOf("MarkdownParser"))
+ //fmt.Println(mdp.GenerateTableOfContents())
+
+ //fmt.Printf("mdp.Names(): %#v\n\n\n", mdp.Names())
+ //fmt.Printf("mdp.PhoneNumbers(): %#v\n\n\n", mdp.PhoneNumbers())
+ //fmt.Printf("mdp.PhoneNumbers(): %#v\n\n\n", mdp.PhoneNumbers())
+ //fmt.Printf("mdp.Links(): %#v\n\n\n", mdp.Links())
+ fmt.Printf("mdp.Emails(): %#v\n\n\n", mdp.Emails())
+
+ return
+}
+*/

Недялко обнови решението на 03.12.2013 04:50 (преди над 4 години)

package main
import (
- //"fmt"
- //"io/ioutil"
"regexp"
"strconv"
)
// A stateless parser implementation that's pretty unoptimized but somewhat flexible
//NOTE: there probably are some of off-by-one errors, too sleepy to test though...
type MarkdownParser struct {
text string
}
type Element struct {
ElementStart, ElementEnd int
TextStart, TextEnd int
MatchedPatterns []*regexp.Regexp
}
func getHeaderRegexPatterns(level int) []*regexp.Regexp {
//TODO: directly cache the compiled regexes: it's a bit wasteful to compute the them every time...
if level < 1 || level > 6 {
panic("OMG")
}
patterns := make([]*regexp.Regexp, 2)
patterns[0] = regexp.MustCompile(`(?m)^#{` + strconv.Itoa(level) + `}[ \t]*([^#].*?)[ \t]*#*\n+`)
if level > 2 {
return patterns[:1]
}
if level == 1 {
patterns[1] = regexp.MustCompile(`(?m)^(.+)[ \t]*\n=+[ \t]*\n+`)
} else if level == 2 {
patterns[1] = regexp.MustCompile(`(?m)^(.+)[ \t]*\n-+[ \t]*\n+`)
}
return patterns
}
func (mp *MarkdownParser) getText(from, to int) string {
if to < 0 || to > len(mp.text) {
to = len(mp.text)
}
if from > to {
return ""
}
return mp.text[from:to]
}
func (mp *MarkdownParser) getFirstElement(patterns []*regexp.Regexp, from, to int) (bool, Element) {
var minResult, currentResult []int
for _, re := range patterns {
if re.NumSubexp() != 1 {
panic("Invalid regex was supplied")
}
currentResult = re.FindStringSubmatchIndex(mp.getText(from, to))
if len(minResult) == 0 || (len(currentResult) > 0 && (currentResult[0] < minResult[0])) {
minResult = currentResult
}
}
if len(minResult) == 0 {
return false, Element{}
}
return true, Element{minResult[0] + from, minResult[1] + from, minResult[2] + from, minResult[3] + from, patterns}
}
func (mp *MarkdownParser) findFirstElementByContent(patterns []*regexp.Regexp, content string, from, to int) (bool, Element) {
var currentFrom int = from
for {
found, elPos := mp.getFirstElement(patterns, currentFrom, to)
if !found {
return false, Element{}
}
if mp.getText(elPos.TextStart, elPos.TextEnd) == content {
return true, elPos
}
currentFrom = elPos.ElementEnd
}
}
func (mp *MarkdownParser) getElementsText(patterns []*regexp.Regexp, from, to int) []string {
var result []string = nil
var currentFrom int = from
for {
found, elPos := mp.getFirstElement(patterns, currentFrom, to)
if !found {
break
}
result = append(result, mp.getText(elPos.TextStart, elPos.TextEnd))
currentFrom = elPos.ElementEnd
}
return result
}
func (mp *MarkdownParser) getMatchedText(pattern string, matchPosition int) []string {
var result []string = nil
re := regexp.MustCompile(pattern)
for _, match := range re.FindAllStringSubmatch(mp.text, -1) {
if len(match) >= matchPosition {
result = append(result, match[matchPosition])
}
}
return result
}
func (mp *MarkdownParser) getChildrenOf(el Element, patterns []*regexp.Regexp) []Element {
var result []Element
var currentFrom int = el.ElementEnd
var currentElementEnd int = -1
if el.ElementEnd > 0 {
foundNextSibling, nextSiblingPos := mp.getFirstElement(el.MatchedPatterns, el.ElementEnd, -1)
if foundNextSibling {
currentElementEnd = nextSiblingPos.ElementStart
}
}
for {
found, elPos := mp.getFirstElement(patterns, currentFrom, currentElementEnd)
if !found {
break
}
result = append(result, elPos)
currentFrom = elPos.ElementEnd
}
//fmt.Printf("mdp.getChildrenOf(): %v\n\n\n", result)
return result
}
func (mp *MarkdownParser) getContentsOf(el Element, prefix string, level int) string {
result := "" //TODO: use a string builder
for i, subEl := range mp.getChildrenOf(el, getHeaderRegexPatterns(level)) {
newPrefix := prefix + "." + strconv.Itoa(i+1)
result += newPrefix + " " + mp.getText(subEl.TextStart, subEl.TextEnd) + "\n"
if level < 6 {
subresult := mp.getContentsOf(subEl, newPrefix, level+1)
if subresult != "" {
result += subresult
}
}
}
return result
}
// The required methods:
func NewMarkdownParser(text string) *MarkdownParser {
result := new(MarkdownParser)
result.text = text
return result
}
func (mp *MarkdownParser) Headers() []string {
return mp.getElementsText(getHeaderRegexPatterns(1), 0, -1)
}
func (mp *MarkdownParser) SubHeadersOf(header string) []string {
var result []string
found, h1 := mp.findFirstElementByContent(getHeaderRegexPatterns(1), header, 0, -1)
if !found {
return nil
}
for _, h2 := range mp.getChildrenOf(h1, getHeaderRegexPatterns(2)) {
result = append(result, mp.getText(h2.TextStart, h2.TextEnd))
}
return result
}
func (mp *MarkdownParser) Names() []string {
return mp.getMatchedText(`[^\.\n"'\s][ \t]*(\p{Lu}\p{Ll}*([ -]+\p{Lu}\p{Ll}*)+)`, 1)
}
func (mp *MarkdownParser) PhoneNumbers() []string {
return mp.getMatchedText(`[^\d\pL\w-]([+\(]?\d[\d\(\)\- ]*\d)`, 1)
}
func (mp *MarkdownParser) Links() []string {
return mp.getMatchedText(`(https?:\/\/[^\s]+)`, 1)
}
func (mp *MarkdownParser) Emails() []string {
return mp.getMatchedText(`[\s\n^]([a-zA-Z0-9][a-zA-Z0-9_\+\.\-]{0,200}@(?:[a-zA-Z0-9]+(?:\-*[a-zA-Z0-9])*\.)+[a-zA-Z]{2,6})`, 1)
}
func (mp *MarkdownParser) GenerateTableOfContents() string {
//RANT: code duplication is neccasary because of the stupid rule that only H1 numbers should end with "." in the TOC...
result := "" //TODO: use a string builder
for i, subEl := range mp.getChildrenOf(Element{}, getHeaderRegexPatterns(1)) {
prefix := strconv.Itoa(i + 1)
result += prefix + ". " + mp.getText(subEl.TextStart, subEl.TextEnd) + "\n"
subresult := mp.getContentsOf(subEl, prefix, 2)
if subresult != "" {
result += subresult
}
}
return result
}
-
-/*
-func main() {
- content, err := ioutil.ReadFile("./README.md")
- if err != nil {
- return
- }
-
- mdp := NewMarkdownParser(string(content))
-
- //fmt.Printf("mdp.Headers(): %v\n\n\n", mdp.Headers())
- //fmt.Printf("mdp.SubHeadersOf('MarkdownParser'): %v\n\n\n", mdp.SubHeadersOf("MarkdownParser"))
- //fmt.Println(mdp.GenerateTableOfContents())
-
- //fmt.Printf("mdp.Names(): %#v\n\n\n", mdp.Names())
- //fmt.Printf("mdp.PhoneNumbers(): %#v\n\n\n", mdp.PhoneNumbers())
- //fmt.Printf("mdp.PhoneNumbers(): %#v\n\n\n", mdp.PhoneNumbers())
- //fmt.Printf("mdp.Links(): %#v\n\n\n", mdp.Links())
- fmt.Printf("mdp.Emails(): %#v\n\n\n", mdp.Emails())
-
- return
-}
-*/