Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
219 lines
5.4 KiB
219 lines
5.4 KiB
// Copyright (c) 2015 Couchbase, Inc. |
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file |
|
// except in compliance with the License. You may obtain a copy of the License at |
|
// http://www.apache.org/licenses/LICENSE-2.0 |
|
// Unless required by applicable law or agreed to in writing, software distributed under the |
|
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, |
|
// either express or implied. See the License for the specific language governing permissions |
|
// and limitations under the License. |
|
|
|
// +build ignore |
|
|
|
package main |
|
|
|
import ( |
|
"bufio" |
|
"bytes" |
|
"flag" |
|
"fmt" |
|
"io" |
|
"log" |
|
"net/http" |
|
"os" |
|
"os/exec" |
|
"strconv" |
|
"strings" |
|
"unicode" |
|
) |
|
|
|
var url = flag.String("url", |
|
"http://www.unicode.org/Public/"+unicode.Version+"/ucd/auxiliary/", |
|
"URL of Unicode database directory") |
|
var verbose = flag.Bool("verbose", |
|
false, |
|
"write data to stdout as it is parsed") |
|
var localFiles = flag.Bool("local", |
|
false, |
|
"data files have been copied to the current directory; for debugging only") |
|
|
|
var outputFile = flag.String("output", |
|
"", |
|
"output file for generated tables; default stdout") |
|
|
|
var output *bufio.Writer |
|
|
|
func main() { |
|
flag.Parse() |
|
setupOutput() |
|
|
|
graphemeTests := make([]test, 0) |
|
graphemeComments := make([]string, 0) |
|
graphemeTests, graphemeComments = loadUnicodeData("GraphemeBreakTest.txt", graphemeTests, graphemeComments) |
|
wordTests := make([]test, 0) |
|
wordComments := make([]string, 0) |
|
wordTests, wordComments = loadUnicodeData("WordBreakTest.txt", wordTests, wordComments) |
|
sentenceTests := make([]test, 0) |
|
sentenceComments := make([]string, 0) |
|
sentenceTests, sentenceComments = loadUnicodeData("SentenceBreakTest.txt", sentenceTests, sentenceComments) |
|
|
|
fmt.Fprintf(output, fileHeader, *url) |
|
generateTestTables("Grapheme", graphemeTests, graphemeComments) |
|
generateTestTables("Word", wordTests, wordComments) |
|
generateTestTables("Sentence", sentenceTests, sentenceComments) |
|
|
|
flushOutput() |
|
} |
|
|
|
// WordBreakProperty.txt has the form: |
|
// 05F0..05F2 ; Hebrew_Letter # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD |
|
// FB1D ; Hebrew_Letter # Lo HEBREW LETTER YOD WITH HIRIQ |
|
func openReader(file string) (input io.ReadCloser) { |
|
if *localFiles { |
|
f, err := os.Open(file) |
|
if err != nil { |
|
log.Fatal(err) |
|
} |
|
input = f |
|
} else { |
|
path := *url + file |
|
resp, err := http.Get(path) |
|
if err != nil { |
|
log.Fatal(err) |
|
} |
|
if resp.StatusCode != 200 { |
|
log.Fatal("bad GET status for "+file, resp.Status) |
|
} |
|
input = resp.Body |
|
} |
|
return |
|
} |
|
|
|
func loadUnicodeData(filename string, tests []test, comments []string) ([]test, []string) { |
|
f := openReader(filename) |
|
defer f.Close() |
|
bufioReader := bufio.NewReader(f) |
|
line, err := bufioReader.ReadString('\n') |
|
for err == nil { |
|
tests, comments = parseLine(line, tests, comments) |
|
line, err = bufioReader.ReadString('\n') |
|
} |
|
// if the err was EOF still need to process last value |
|
if err == io.EOF { |
|
tests, comments = parseLine(line, tests, comments) |
|
} |
|
return tests, comments |
|
} |
|
|
|
const comment = "#" |
|
const brk = "÷" |
|
const nbrk = "×" |
|
|
|
type test [][]byte |
|
|
|
func parseLine(line string, tests []test, comments []string) ([]test, []string) { |
|
if strings.HasPrefix(line, comment) { |
|
return tests, comments |
|
} |
|
line = strings.TrimSpace(line) |
|
if len(line) == 0 { |
|
return tests, comments |
|
} |
|
commentStart := strings.Index(line, comment) |
|
comment := strings.TrimSpace(line[commentStart+1:]) |
|
if commentStart > 0 { |
|
line = line[0:commentStart] |
|
} |
|
pieces := strings.Split(line, brk) |
|
t := make(test, 0) |
|
for _, piece := range pieces { |
|
piece = strings.TrimSpace(piece) |
|
if len(piece) > 0 { |
|
codePoints := strings.Split(piece, nbrk) |
|
word := "" |
|
for _, codePoint := range codePoints { |
|
codePoint = strings.TrimSpace(codePoint) |
|
r, err := strconv.ParseInt(codePoint, 16, 64) |
|
if err != nil { |
|
log.Printf("err: %v for '%s'", err, string(r)) |
|
return tests, comments |
|
} |
|
|
|
word += string(r) |
|
} |
|
t = append(t, []byte(word)) |
|
} |
|
} |
|
tests = append(tests, t) |
|
comments = append(comments, comment) |
|
return tests, comments |
|
} |
|
|
|
func generateTestTables(prefix string, tests []test, comments []string) { |
|
fmt.Fprintf(output, testHeader, prefix) |
|
for i, t := range tests { |
|
fmt.Fprintf(output, "\t\t{\n") |
|
fmt.Fprintf(output, "\t\t\tinput: %#v,\n", bytes.Join(t, []byte{})) |
|
fmt.Fprintf(output, "\t\t\toutput: %s,\n", generateTest(t)) |
|
fmt.Fprintf(output, "\t\t\tcomment: `%s`,\n", comments[i]) |
|
fmt.Fprintf(output, "\t\t},\n") |
|
} |
|
fmt.Fprintf(output, "}\n") |
|
} |
|
|
|
func generateTest(t test) string { |
|
rv := "[][]byte{" |
|
for _, te := range t { |
|
rv += fmt.Sprintf("%#v,", te) |
|
} |
|
rv += "}" |
|
return rv |
|
} |
|
|
|
const fileHeader = `// Generated by running |
|
// maketesttables --url=%s |
|
// DO NOT EDIT |
|
|
|
package segment |
|
` |
|
|
|
const testHeader = `var unicode%sTests = []struct { |
|
input []byte |
|
output [][]byte |
|
comment string |
|
}{ |
|
` |
|
|
|
func setupOutput() { |
|
output = bufio.NewWriter(startGofmt()) |
|
} |
|
|
|
// startGofmt connects output to a gofmt process if -output is set. |
|
func startGofmt() io.Writer { |
|
if *outputFile == "" { |
|
return os.Stdout |
|
} |
|
stdout, err := os.Create(*outputFile) |
|
if err != nil { |
|
log.Fatal(err) |
|
} |
|
// Pipe output to gofmt. |
|
gofmt := exec.Command("gofmt") |
|
fd, err := gofmt.StdinPipe() |
|
if err != nil { |
|
log.Fatal(err) |
|
} |
|
gofmt.Stdout = stdout |
|
gofmt.Stderr = os.Stderr |
|
err = gofmt.Start() |
|
if err != nil { |
|
log.Fatal(err) |
|
} |
|
return fd |
|
} |
|
|
|
func flushOutput() { |
|
err := output.Flush() |
|
if err != nil { |
|
log.Fatal(err) |
|
} |
|
}
|
|
|