You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

93 lines
2.6 KiB

package monday
import (
// dateStringLayoutItem represents one word or set of delimiters between words.
// This is an abstraction level above date raw character string of date representation.
// Example: "1 February / 2013" ->
// dateStringLayoutItem { item: "1", isWord: true }
// dateStringLayoutItem { item: " ", isWord: false }
// dateStringLayoutItem { item: "February", isWord: true }
// dateStringLayoutItem { item: " / ", isWord: false }
// dateStringLayoutItem { item: "2013", isWord: true }
type dateStringLayoutItem struct {
item string
isWord bool // true if this is a sequence of letters/digits (as opposed to a sequence of non-letters like delimiters)
isDigit bool // true if this is a sequence only containing digits
// extractLetterSequence extracts first word (sequence of letters ending with a non-letter)
// starting with the specified index and wraps it to dateStringLayoutItem according to the type
// of the word.
func extractLetterSequence(originalStr string, index int) (it dateStringLayoutItem) {
letters := &strings.Builder{}
bytesToParse := []byte(originalStr[index:])
runeCount := utf8.RuneCount(bytesToParse)
var isWord bool
var isDigit bool
for i := 0; i < runeCount; i++ {
rne, runeSize := utf8.DecodeRune(bytesToParse)
bytesToParse = bytesToParse[runeSize:]
if i == 0 {
isWord = unicode.IsLetter(rne)
isDigit = unicode.IsDigit(rne)
} else {
if (isWord && (!unicode.IsLetter(rne) && !unicode.IsDigit(rne))) ||
(isDigit && !unicode.IsDigit(rne)) ||
(!isWord && unicode.IsLetter(rne)) ||
(!isDigit && unicode.IsDigit(rne)) {
it.item = letters.String()
it.isWord = isWord
it.isDigit = isDigit
// stringToLayoutItems transforms raw date string (like "2 Mar 2012") into
// a set of dateStringLayoutItems, which are more convenient to work with
// in other analysis modules.
func stringToLayoutItems(dateStr string) (seqs []dateStringLayoutItem) {
i := 0
for i < len(dateStr) {
seq := extractLetterSequence(dateStr, i)
i += len(seq.item)
seqs = append(seqs, seq)
func layoutToString(li []dateStringLayoutItem) string {
// This function is expensive enough to be worth counting
// bytes and allocating all in one go.
numChars := 0
for _, v := range li {
numChars += len(v.item)
sb := &strings.Builder{}
for _, v := range li {
return sb.String()