comma_tokenizer.go 588 B

123456789101112131415161718192021222324252627
  1. package search
  2. import "github.com/blevesearch/bleve/analysis"
  3. type CommaTokenizer struct{}
  4. func (ct *CommaTokenizer) Tokenize(sentence []byte) analysis.TokenStream {
  5. result := make(analysis.TokenStream, 0)
  6. pos := 1
  7. lastIdx := 0
  8. for i, length := 0, len(sentence); i < length; i++ {
  9. if sentence[i] == ',' {
  10. token := analysis.Token{
  11. Term: sentence[lastIdx:i],
  12. Start: lastIdx,
  13. End: i - 1,
  14. Position: pos,
  15. Type: analysis.Ideographic,
  16. KeyWord: true,
  17. }
  18. pos++
  19. lastIdx = i - 1
  20. result = append(result, &token)
  21. }
  22. }
  23. return result
  24. }