aboutsummaryrefslogtreecommitdiff
path: root/cmd/importer
diff options
context:
space:
mode:
authorGabriel A. Giovanini <mail@gabrielgio.me>2024-04-19 18:22:50 +0200
committerGabriel A. Giovanini <mail@gabrielgio.me>2024-04-19 18:22:50 +0200
commit57c782546739fde08138b00e2d0b3ba5f18fb676 (patch)
tree8f8a46ba9715359ab500d52f7728f97b876466d8 /cmd/importer
parent1e36d1ba1ba9659ffd01e06e93ffee670f842ff8 (diff)
downloaddict-57c782546739fde08138b00e2d0b3ba5f18fb676.tar.gz
dict-57c782546739fde08138b00e2d0b3ba5f18fb676.tar.bz2
dict-57c782546739fde08138b00e2d0b3ba5f18fb676.zip
ref: Better organize the files
Diffstat (limited to 'cmd/importer')
-rw-r--r--cmd/importer/importer.go131
1 files changed, 131 insertions, 0 deletions
diff --git a/cmd/importer/importer.go b/cmd/importer/importer.go
new file mode 100644
index 0000000..18a7a7b
--- /dev/null
+++ b/cmd/importer/importer.go
@@ -0,0 +1,131 @@
+package importer
+
+import (
+ "bufio"
+ "bytes"
+ "context"
+ "fmt"
+ "io"
+ "math"
+ "os"
+ "strings"
+
+ "github.com/urfave/cli/v2"
+
+ "git.gabrielgio.me/dict/db"
+)
+
+var ImportCommand = &cli.Command{
+ Name: "import",
+ Usage: "convert dict.cc dictionary into a queryable sqlite format.",
+ Flags: []cli.Flag{
+ &cli.StringFlag{
+ Name: "output",
+ Value: "main.dict",
+ Usage: "Dictionary database location",
+ },
+ &cli.StringFlag{
+ Name: "input",
+ Value: "dict.txt",
+ Usage: "Dict.cc txt dictionary file",
+ },
+ },
+ Action: func(cCtx *cli.Context) error {
+ input := cCtx.String("input")
+ output := cCtx.String("output")
+ return Import(context.Background(), input, output)
+ },
+}
+
+func Import(ctx context.Context, txtInput, sqliteOutput string) error {
+ db, err := db.Open(":memory:")
+ if err != nil {
+ return err
+ }
+ err = db.Migrate(ctx)
+ if err != nil {
+ return err
+ }
+
+ file, err := os.Open(txtInput)
+ if err != nil {
+ return err
+ }
+ defer file.Close()
+
+ count := 0
+ total, err := lineCounter(file)
+ if err != nil {
+ return err
+ }
+
+ _, err = file.Seek(0, 0)
+ if err != nil {
+ return err
+ }
+
+ scanner := bufio.NewScanner(file)
+ for scanner.Scan() {
+ if strings.HasPrefix(scanner.Text(), "#") || scanner.Text() == "" {
+ continue
+ }
+
+ var (
+ p = strings.SplitN(scanner.Text(), "\t", 2)
+ word = p[0]
+ line = strings.ReplaceAll(p[1], "\t", " ")
+ )
+
+ if err := db.InsertLine(ctx, word, line); err != nil {
+ return err
+ }
+ count++
+
+ if (count % 1234) == 0 {
+ fmt.Print("\033[G\033[K") // move the cursor left and clear the line
+ per := math.Ceil((float64(count) / float64(total)) * 100.0)
+ fmt.Printf("%d/%d (%.0f%%)", count, total, per)
+ }
+ }
+
+ fmt.Printf("Consolidating")
+ err = db.Consolidade(ctx)
+ if err != nil {
+ return err
+ }
+
+ err = db.Backup(ctx, sqliteOutput)
+ if err != nil {
+ return err
+ }
+ return nil
+}
+
+func lineCounter(r io.Reader) (int, error) {
+ var count int
+ const lineBreak = '\n'
+
+ buf := make([]byte, bufio.MaxScanTokenSize)
+
+ for {
+ bufferSize, err := r.Read(buf)
+ if err != nil && err != io.EOF {
+ return 0, err
+ }
+
+ var buffPosition int
+ for {
+ i := bytes.IndexByte(buf[buffPosition:], lineBreak)
+ if i == -1 || bufferSize == buffPosition {
+ break
+ }
+ buffPosition += i + 1
+ count++
+ }
+ if err == io.EOF {
+ break
+ }
+ }
+
+ return count, nil
+}