Решение на Markdown от Мартин Ангелов

Обратно към всички решения

Към профила на Мартин Ангелов

Резултати

  • 6 точки от тестове
  • 0 бонус точки
  • 6 точки общо
  • 4 успешни тест(а)
  • 3 неуспешни тест(а)

Код

package main
import (
"reflect"
"regexp"
"strconv"
"strings"
)
type MarkdownParser struct {
text string
}
func arraySearch(needle interface{}, haystack interface{}) int {
refl := reflect.ValueOf(haystack)
for i := 0; i < refl.Len(); i++ {
if refl.Index(i).Interface() == needle {
return i
}
}
return -1
}
func NewMarkdownParser(text string) *MarkdownParser {
mp := new(MarkdownParser)
// Lets remove all \r
re := regexp.MustCompile(`\r`)
text = re.ReplaceAllLiteralString(text, "")
mp.text = text
return mp
}
const matchHeaders = `(?m)^(` + // Begin group 1
`(` + // Begin group 2
`(.+)` + // Group 3
`[ \t]*\n[ \t]*=+` +
`)` + // End group 2
`|(` + // Begin group 4
`# [ \t]*(.+?)` + // Group 5
`[ \t]*#*[ \t]*` +
`)` + // End group 4
`)\n+` // End group 1
func (mp *MarkdownParser) Headers() []string {
var headers []string
re := regexp.MustCompile(matchHeaders)
matches := re.FindAllStringSubmatch(mp.text, -1)
for _, re := range matches {
if len(re) > 5 && re[5] != "" {
headers = append(headers, re[5])
} else if len(re) > 3 && re[3] != "" {
headers = append(headers, re[3])
}
}
return headers
}
func (mp *MarkdownParser) SubHeadersOf(header string) []string {
var subHeaders []string
headers := mp.Headers()
re := regexp.MustCompile(matchHeaders)
parts := re.Split(mp.text, -1)
index := arraySearch(header, headers)
if index != -1 {
// Kind of want to skip checking if content is before the header,
// or after the next header, which will be complicated in regex
content := parts[index+1]
re = regexp.MustCompile(`(?m)^(` + // Begin group 1
`(` + // Begin group 2
`(.+)` + // Group 3
`[ \t]*\n[ \t]*-+` +
`)` + // End group 2
`|(` + // Begin group 4
`## [ \t]*(.+?)` + // Group 5
`[ \t]*#*[ \t]*` +
`)` + // End group 4
`)\n+`)
matches := re.FindAllStringSubmatch(content, -1)
for _, re := range matches {
if len(re) > 5 && re[5] != "" {
subHeaders = append(subHeaders, re[5])
} else if len(re) > 3 && re[3] != "" {
subHeaders = append(subHeaders, re[3])
}
}
}
return subHeaders
}
func (mp *MarkdownParser) Names() []string {
var names []string
re := regexp.MustCompile(`\p{L} +(((\p{Lu}\p{Ll}*)( |( *- *)))+(\p{Lu}\p{Ll}*))`)
matches := re.FindAllStringSubmatch(mp.text, -1)
for _, re := range matches {
if len(re) > 1 && re[1] != "" {
names = append(names, re[1])
}
}
return names
}
func (mp *MarkdownParser) PhoneNumbers() []string {
var phones []string
re := regexp.MustCompile(`(^|[\t\n\v\f\r ])(((\+ *)|(\( *))?[0-9]+([ \-()]*[0-9]+)*)([\t\n\v\f\r ]|$)`)
matches := re.FindAllStringSubmatch(mp.text, -1)
for _, re := range matches {
if len(re) > 2 && re[2] != "" {
phones = append(phones, re[2])
}
}
return phones
}
func (mp *MarkdownParser) Links() []string {
var links []string
re := regexp.MustCompile(`\w+://([a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?)+)[-._A-Za-z0-9~:/?#[\]@!$&'()*+,;=]*`)
matches := re.FindAllStringSubmatch(mp.text, -1)
count := 0
L:
for _, re := range matches {
if len(re[1]) > 255 {
continue
}
parts := strings.Split(".", re[1])
for _, re := range parts {
if len(re) > 63 {
continue L
}
}
links = append(links, re[0])
count++
}
return links
}
func (mp *MarkdownParser) Emails() []string {
var emails []string
re := regexp.MustCompile(`(?m)(^|[\t\n\v\f\r ])([a-zA-Z0-9][-._+a-zA-Z0-9]*@[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?)+[A-Za-z0-9-._~:/?#[\]@!$&'()*+,;=]*)([\t\n\v\f\r ]|$)`)
matches := re.FindAllStringSubmatch(mp.text, -1)
for _, re := range matches {
emails = append(emails, re[2])
}
return emails
}
func (mp *MarkdownParser) GenerateTableOfContents() string {
content := ""
lastLevel := 0
count := make([]int, 6)
re := regexp.MustCompile(`(?m)^(((.+?)[ \t]*\n[ \t]*(=|-)+\n)|((#{1,6}) [ \t]*(.+)#*[ \t]*))`)
matches := re.FindAllStringSubmatch(mp.text, -1)
for _, re := range matches {
prefix := ""
var curLevel int
var header string
if len(re) == 8 && re[7] != "" {
curLevel = len(re[6]) - 1
header = re[7]
} else {
if re[4] == "-" {
curLevel = 1
} else { // it's =
curLevel = 0
}
header = re[3]
}
if curLevel < lastLevel {
for i := curLevel + 1; i < len(count); i++ {
count[i] = 0
}
}
lastLevel = curLevel
count[curLevel]++
for _, c := range count {
if c == 0 {
break
} else {
if len(prefix) > 0 {
prefix += "." + strconv.Itoa(c)
} else {
prefix = strconv.Itoa(c)
}
}
}
if curLevel == 0 {
prefix += "."
}
content += prefix + " " + header + "\n"
}
return content
}
func main() {
}

Лог от изпълнението

PASS
ok  	_/tmp/d20140106-32701-1j6c5wj	0.012s
PASS
ok  	_/tmp/d20140106-32701-1j6c5wj	0.013s
PASS
ok  	_/tmp/d20140106-32701-1j6c5wj	0.012s
--- FAIL: TestPhoneNumbers (0.00 seconds)
	solution_test.go:86: Not equal:
		  []string{"0889123456", "0 (889) 123"}
		  []string{"0889123456", "0 (889) 123", "456", "+45-(31)"}
FAIL
exit status 1
FAIL	_/tmp/d20140106-32701-1j6c5wj	0.012s
--- FAIL: TestLinks (0.00 seconds)
	solution_test.go:98: Not equal:
		  []string{"http://somelink.com:230", "https://www.google.bg/search?q=4531&ie=utf-8&oe=utf-8&rls=org.mozilla:en-US:official&client="}
		  []string{"http://somelink.com:230", "https://www.google.bg/search?q=4531&ie=utf-8&oe=utf-8&rls=org.mozilla:en-US:official&client=%20firefox-a&gws_rd=asd&ei=some#somefragment"}
FAIL
exit status 1
FAIL	_/tmp/d20140106-32701-1j6c5wj	0.012s
--- FAIL: TestEmails (0.00 seconds)
	solution_test.go:107: Not equal:
		  []string{"validMail12@foobar.com", "toolongmailhereaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa@gmail.com"}
		  []string{"validMail12@foobar.com", "12mail@gmail.com"}
FAIL
exit status 1
FAIL	_/tmp/d20140106-32701-1j6c5wj	0.013s
PASS
ok  	_/tmp/d20140106-32701-1j6c5wj	0.012s

История (1 версия и 0 коментара)

Мартин обнови решението на 01.12.2013 20:45 (преди над 4 години)

+package main
+
+import (
+ "reflect"
+ "regexp"
+ "strconv"
+ "strings"
+)
+
+type MarkdownParser struct {
+ text string
+}
+
+func arraySearch(needle interface{}, haystack interface{}) int {
+ refl := reflect.ValueOf(haystack)
+ for i := 0; i < refl.Len(); i++ {
+ if refl.Index(i).Interface() == needle {
+ return i
+ }
+ }
+ return -1
+}
+
+func NewMarkdownParser(text string) *MarkdownParser {
+ mp := new(MarkdownParser)
+ // Lets remove all \r
+ re := regexp.MustCompile(`\r`)
+ text = re.ReplaceAllLiteralString(text, "")
+
+ mp.text = text
+ return mp
+}
+
+const matchHeaders = `(?m)^(` + // Begin group 1
+ `(` + // Begin group 2
+ `(.+)` + // Group 3
+ `[ \t]*\n[ \t]*=+` +
+ `)` + // End group 2
+ `|(` + // Begin group 4
+ `# [ \t]*(.+?)` + // Group 5
+ `[ \t]*#*[ \t]*` +
+ `)` + // End group 4
+ `)\n+` // End group 1
+
+func (mp *MarkdownParser) Headers() []string {
+ var headers []string
+ re := regexp.MustCompile(matchHeaders)
+ matches := re.FindAllStringSubmatch(mp.text, -1)
+ for _, re := range matches {
+ if len(re) > 5 && re[5] != "" {
+ headers = append(headers, re[5])
+ } else if len(re) > 3 && re[3] != "" {
+ headers = append(headers, re[3])
+ }
+ }
+ return headers
+}
+
+func (mp *MarkdownParser) SubHeadersOf(header string) []string {
+ var subHeaders []string
+ headers := mp.Headers()
+ re := regexp.MustCompile(matchHeaders)
+ parts := re.Split(mp.text, -1)
+ index := arraySearch(header, headers)
+ if index != -1 {
+ // Kind of want to skip checking if content is before the header,
+ // or after the next header, which will be complicated in regex
+ content := parts[index+1]
+ re = regexp.MustCompile(`(?m)^(` + // Begin group 1
+ `(` + // Begin group 2
+ `(.+)` + // Group 3
+ `[ \t]*\n[ \t]*-+` +
+ `)` + // End group 2
+ `|(` + // Begin group 4
+ `## [ \t]*(.+?)` + // Group 5
+ `[ \t]*#*[ \t]*` +
+ `)` + // End group 4
+ `)\n+`)
+ matches := re.FindAllStringSubmatch(content, -1)
+ for _, re := range matches {
+ if len(re) > 5 && re[5] != "" {
+ subHeaders = append(subHeaders, re[5])
+ } else if len(re) > 3 && re[3] != "" {
+ subHeaders = append(subHeaders, re[3])
+ }
+ }
+ }
+ return subHeaders
+}
+
+func (mp *MarkdownParser) Names() []string {
+ var names []string
+ re := regexp.MustCompile(`\p{L} +(((\p{Lu}\p{Ll}*)( |( *- *)))+(\p{Lu}\p{Ll}*))`)
+ matches := re.FindAllStringSubmatch(mp.text, -1)
+ for _, re := range matches {
+ if len(re) > 1 && re[1] != "" {
+ names = append(names, re[1])
+ }
+ }
+
+ return names
+}
+
+func (mp *MarkdownParser) PhoneNumbers() []string {
+ var phones []string
+ re := regexp.MustCompile(`(^|[\t\n\v\f\r ])(((\+ *)|(\( *))?[0-9]+([ \-()]*[0-9]+)*)([\t\n\v\f\r ]|$)`)
+ matches := re.FindAllStringSubmatch(mp.text, -1)
+ for _, re := range matches {
+ if len(re) > 2 && re[2] != "" {
+ phones = append(phones, re[2])
+ }
+ }
+ return phones
+}
+
+func (mp *MarkdownParser) Links() []string {
+ var links []string
+ re := regexp.MustCompile(`\w+://([a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?)+)[-._A-Za-z0-9~:/?#[\]@!$&'()*+,;=]*`)
+ matches := re.FindAllStringSubmatch(mp.text, -1)
+ count := 0
+L:
+ for _, re := range matches {
+ if len(re[1]) > 255 {
+ continue
+ }
+ parts := strings.Split(".", re[1])
+ for _, re := range parts {
+ if len(re) > 63 {
+ continue L
+ }
+ }
+ links = append(links, re[0])
+ count++
+ }
+
+ return links
+}
+
+func (mp *MarkdownParser) Emails() []string {
+ var emails []string
+ re := regexp.MustCompile(`(?m)(^|[\t\n\v\f\r ])([a-zA-Z0-9][-._+a-zA-Z0-9]*@[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?)+[A-Za-z0-9-._~:/?#[\]@!$&'()*+,;=]*)([\t\n\v\f\r ]|$)`)
+ matches := re.FindAllStringSubmatch(mp.text, -1)
+ for _, re := range matches {
+ emails = append(emails, re[2])
+ }
+ return emails
+}
+
+func (mp *MarkdownParser) GenerateTableOfContents() string {
+ content := ""
+ lastLevel := 0
+ count := make([]int, 6)
+ re := regexp.MustCompile(`(?m)^(((.+?)[ \t]*\n[ \t]*(=|-)+\n)|((#{1,6}) [ \t]*(.+)#*[ \t]*))`)
+ matches := re.FindAllStringSubmatch(mp.text, -1)
+ for _, re := range matches {
+ prefix := ""
+ var curLevel int
+ var header string
+ if len(re) == 8 && re[7] != "" {
+ curLevel = len(re[6]) - 1
+ header = re[7]
+ } else {
+ if re[4] == "-" {
+ curLevel = 1
+ } else { // it's =
+ curLevel = 0
+ }
+ header = re[3]
+ }
+ if curLevel < lastLevel {
+ for i := curLevel + 1; i < len(count); i++ {
+ count[i] = 0
+ }
+ }
+ lastLevel = curLevel
+ count[curLevel]++
+ for _, c := range count {
+ if c == 0 {
+ break
+ } else {
+ if len(prefix) > 0 {
+ prefix += "." + strconv.Itoa(c)
+ } else {
+ prefix = strconv.Itoa(c)
+ }
+ }
+ }
+
+ if curLevel == 0 {
+ prefix += "."
+ }
+
+ content += prefix + " " + header + "\n"
+ }
+ return content
+}
+
+func main() {
+
+}