// For more tutorials: https://blog.learngoprogramming.com // // Copyright © 2018 Inanc Gumus // Learn Go Programming Course // License: https://creativecommons.org/licenses/by-nc-sa/4.0/ // package main import ( "fmt" "unicode/utf8" "unsafe" ) // Please run this code and experiment with it // Observe the results // USELESS-NOTE : "Öykü" means "Story" in Turkish! func main() { fmt.Println("-----------------------------------") fmt.Println("ASCII Codepoints") fmt.Println("-----------------------------------") var ( a, z byte = 'a', 'z' A, Z byte = 'A', 'Z' d0, d9 byte = '0', '9' ) for _, c := range []byte{a, z, A, Z, d0, d9} { fmt.Printf("%c - 1 byte - %[1]U - %[1]d\n", c) } fmt.Println("\n-----------------------------------") fmt.Println("Unicode Codepoints") fmt.Println("-----------------------------------") var ( Ö = 'Ö' 栗 = '栗' monkey = '🙉' ) for _, c := range []rune{rune(A), Ö, 栗, monkey} { fmt.Printf("%c - %d bytes - %[1]U - %[1]d\n", c, cptb(c)) } fmt.Println("\n-----------------------------------") fmt.Println("UTF-8 Encoded") fmt.Println("-----------------------------------") // utf8.RuneLen finds the number of bytes necessary for // encoding a codepoint to utf8 for _, c := range []rune{rune(A), Ö, 栗, monkey} { fmt.Printf("%c - %d bytes - %[1]U - %[1]d\n", c, utf8.RuneLen(c)) } fmt.Println("\n-----------------------------------") fmt.Println("Example: Unicode Codepoints") fmt.Println("-----------------------------------") var ( ö = 'ö' y = 'y' k = 'k' ü = 'ü' ) var ( oykuRunes = []rune{ö, y, k, ü} total int ) for _, c := range oykuRunes { fmt.Printf("%c - %d bytes - %[1]U - %[1]d\n", c, cptb(c)) // unsafe.Sizeof finds the memory size of simple values // don't use it in production-level code -> it's unsafe! total += int(unsafe.Sizeof(c)) } fmt.Printf("TOTAL: %d bytes.\n", total) fmt.Println("\n-----------------------------------") fmt.Println("Example: Indexing") fmt.Println("-----------------------------------") fmt.Printf("%c%c%c%c\n", oykuRunes[0], oykuRunes[1], oykuRunes[2], oykuRunes[len(oykuRunes)-1]) // string to []rune oykuRunes = []rune("öykü") fmt.Printf("%c%c%c%c\n", oykuRunes[0], oykuRunes[1], oykuRunes[2], oykuRunes[len(oykuRunes)-1]) fmt.Println("\n-----------------------------------") fmt.Println("Example: UTF-8 Encoding") fmt.Println("-----------------------------------") // this is also ok // oykuString := string(oykuRunes) oykuString := "öykü" fmt.Printf("TOTAL bytes in oykuRunes : %d\n", total) fmt.Printf("TOTAL bytes in oykuString: %d\n", len(oykuString)) fmt.Printf("TOTAL runes in oykuString: %d\n", utf8.RuneCountInString(oykuString)) fmt.Printf("Runes of oykuString : %s\n", oykuString) fmt.Printf("Bytes of oykuString : % x\n", oykuString) fmt.Println() for i := 0; i < len(oykuString); i++ { fmt.Printf("oykuString[%d]: %c\n", i, oykuString[i]) } // slicing returns a slice with the type of the sliced value // so, the sliced value is a string, then a string is returned // // example: // oykuString[0:2] is a string fmt.Println() fmt.Printf("oykuString[0:2]: %q\n", oykuString[0:2]) fmt.Printf("oykuString[4:6]: %q\n", oykuString[4:6]) } // ------------------------------------------------------------------- // cptb finds how many bytes are necessary to represent a codepoint // cptb means codepoint to bytes func cptb(r rune) int { switch { case r <= 0xFF: // 255 return 1 case r <= 0xFFFF: // 65,535 return 2 case r <= 0xFFFFF: // 16,777,215 return 3 } return 4 }