diff options
author | Gabriel A. Giovanini <mail@gabrielgio.me> | 2024-04-19 18:22:50 +0200 |
---|---|---|
committer | Gabriel A. Giovanini <mail@gabrielgio.me> | 2024-04-19 18:22:50 +0200 |
commit | 57c782546739fde08138b00e2d0b3ba5f18fb676 (patch) | |
tree | 8f8a46ba9715359ab500d52f7728f97b876466d8 /cmd/importer | |
parent | 1e36d1ba1ba9659ffd01e06e93ffee670f842ff8 (diff) | |
download | dict-57c782546739fde08138b00e2d0b3ba5f18fb676.tar.gz dict-57c782546739fde08138b00e2d0b3ba5f18fb676.tar.bz2 dict-57c782546739fde08138b00e2d0b3ba5f18fb676.zip |
ref: Better organize the files
Diffstat (limited to 'cmd/importer')
-rw-r--r-- | cmd/importer/importer.go | 131 |
1 files changed, 131 insertions, 0 deletions
diff --git a/cmd/importer/importer.go b/cmd/importer/importer.go new file mode 100644 index 0000000..18a7a7b --- /dev/null +++ b/cmd/importer/importer.go @@ -0,0 +1,131 @@ +package importer + +import ( + "bufio" + "bytes" + "context" + "fmt" + "io" + "math" + "os" + "strings" + + "github.com/urfave/cli/v2" + + "git.gabrielgio.me/dict/db" +) + +var ImportCommand = &cli.Command{ + Name: "import", + Usage: "convert dict.cc dictionary into a queryable sqlite format.", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "output", + Value: "main.dict", + Usage: "Dictionary database location", + }, + &cli.StringFlag{ + Name: "input", + Value: "dict.txt", + Usage: "Dict.cc txt dictionary file", + }, + }, + Action: func(cCtx *cli.Context) error { + input := cCtx.String("input") + output := cCtx.String("output") + return Import(context.Background(), input, output) + }, +} + +func Import(ctx context.Context, txtInput, sqliteOutput string) error { + db, err := db.Open(":memory:") + if err != nil { + return err + } + err = db.Migrate(ctx) + if err != nil { + return err + } + + file, err := os.Open(txtInput) + if err != nil { + return err + } + defer file.Close() + + count := 0 + total, err := lineCounter(file) + if err != nil { + return err + } + + _, err = file.Seek(0, 0) + if err != nil { + return err + } + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + if strings.HasPrefix(scanner.Text(), "#") || scanner.Text() == "" { + continue + } + + var ( + p = strings.SplitN(scanner.Text(), "\t", 2) + word = p[0] + line = strings.ReplaceAll(p[1], "\t", " ") + ) + + if err := db.InsertLine(ctx, word, line); err != nil { + return err + } + count++ + + if (count % 1234) == 0 { + fmt.Print("\033[G\033[K") // move the cursor left and clear the line + per := math.Ceil((float64(count) / float64(total)) * 100.0) + fmt.Printf("%d/%d (%.0f%%)", count, total, per) + } + } + + fmt.Printf("Consolidating") + err = db.Consolidade(ctx) + if err != nil { + return err + } + + err = db.Backup(ctx, sqliteOutput) + if err != nil { + return err + } + return nil +} + +func lineCounter(r io.Reader) (int, error) { + var count int + const lineBreak = '\n' + + buf := make([]byte, bufio.MaxScanTokenSize) + + for { + bufferSize, err := r.Read(buf) + if err != nil && err != io.EOF { + return 0, err + } + + var buffPosition int + for { + i := bytes.IndexByte(buf[buffPosition:], lineBreak) + if i == -1 || bufferSize == buffPosition { + break + } + buffPosition += i + 1 + count++ + } + if err == io.EOF { + break + } + } + + return count, nil +} |