how can I achieve faster mariadb inserts - sql

I am dealing with a bit over 15 billion rows of data in various text files. I am trying to insert them into MariaDB using golang. Golang is a fast language and is often used for big data but I cannot get more than 10k-15k inserts a second, at this rate its gonna take over 15 days, I need this data imported sooner than that. I have tried various batch sizes but they all give about the same results.
function I'm using to handle file data:
func handlePath(path string) {
file, err := os.Open(path)
if err != nil {
fmt.Printf("error opening %v: %v", path, err)
return
}
defer file.Close()
scanner := bufio.NewScanner(file)
var temp_lines []string
for scanner.Scan() {
if len(temp_lines) == line_batch {
insertRows(temp_lines)
temp_lines = []string{}
}
temp_lines = append(temp_lines, scanner.Text())
}
insertRows(temp_lines)
fmt.Printf("\nFormatted %v\n", path)
if err := scanner.Err(); err != nil {
fmt.Printf("\nScanner error %v\n", err)
return
}
}
function I'm using for inserting:
func insertRows(rows []string) {
var Args []string
for _, row := range rows {
line_split := strings.Split(row, "|")
if len(line_split) != 6 {return}
database_id := line_split[0]
email := line_split[1]
password := line_split[2]
username := line_split[3]
ip := line_split[4]
phone := line_split[5]
arg := fmt.Sprintf("('%v','%v','%v','%v','%v','%v')",database_id,email,password,username,ip,phone)
Args = append(Args, arg)
}
sqlQuery := fmt.Sprintf("INSERT INTO new_table (database_id, email, password, username, ip, phone_number) VALUES %s", strings.Join(Args, ","))
_, err := db.Exec(sqlQuery)
if err != nil {
//fmt.Printf("%v\n", err)
return
}
total+=line_batch
writes++
}
Server specs:
server

Related

How can i tell the PATCH Method which field i want to update

I'm working on a simple REST API and I'm having troubles with the PATCH method. I don't know how can i tell the method and the query which fields i want to update(for example which fields are passed as JSON) in the database. Here is what i have so far.
func PatchServer(c echo.Context) error {
patchedServer := new(structs.Server)
requestID := c.Param("id")
if err := c.Bind(patchedServer); err != nil {
return err
}
sql := "UPDATE servers SET server_name = CASE WHEN ? IS NOT NULL THEN ? END WHERE id = ?"
stmt, err := db.Get().Prepare(sql)
if err != nil {
panic(err)
}
_, err2 := stmt.Exec(patchedServer.Name, patchedServer.Name, requestID)
if err2 != nil {
panic(err2)
}
fmt.Println(patchedServer.ID, patchedServer.Name, patchedServer.Components)
fmt.Println("Requested id: ", requestID)
return c.JSON(http.StatusOK, "Patched!")
}

Bulk insert copy sql table with golang

For the context, I'm new to go and I'm creating a program that can copy tables from Oracle to MySQL.
I use database/sql go package, so I assume it can be used for migrating any kind of database.
To simplify my question I'm coping on the same MySQL database table name world.city to world.city_copy2.
with my following code, I ended up with the same last values in all the rows in the table :-(
do I somehow need to read through all the values inside the loop? what is the efficient way to do that?
package main
import (
"database/sql"
"fmt"
"strings"
_ "github.com/go-sql-driver/mysql"
)
const (
user = "user"
pass = "testPass"
server = "localhost"
)
func main() {
fmt.Print("test")
conStr := fmt.Sprintf("%s:%s#tcp(%s)/world", user, pass, server)
db, err := sql.Open("mysql", conStr)
if err != nil {
panic(err.Error())
}
defer db.Close()
err = db.Ping()
if err != nil {
panic(err.Error())
}
rows, err := db.Query("SELECT * FROM city")
if err != nil {
panic(err.Error()) // proper error handling instead of panic in your app
}
columns, err := rows.Columns()
if err != nil {
panic(err.Error()) // proper error handling instead of panic in your app
}
// Make a slice for the values
values := make([]sql.RawBytes, len(columns))
// rows.Scan wants '[]interface{}' as an argument, so we must copy the
// references into such a slice
scanArgs := make([]interface{}, len(values))
for i := range values {
scanArgs[i] = &values[i]
}
// that string will be generated according to len of columns
placeHolders := "( ?, ?, ?, ?, ? )"
// slice will contain all the values at the end
bulkValues := []interface{}{}
valueStrings := make([]string, 0)
for rows.Next() {
// get RawBytes from data
err = rows.Scan(scanArgs...)
if err != nil {
panic(err.Error()) // proper error handling instead of panic in your app
}
valueStrings = append(valueStrings, placeHolders)
bulkValues = append(bulkValues, scanArgs...)
//
}
stmStr := fmt.Sprintf("INSERT INTO city_copy2 VALUES %s", strings.Join(valueStrings, ","))
_, err = db.Exec(stmStr, bulkValues...)
if err != nil {
panic(err.Error())
}
}
I have checked out the docs of the library, and it seems that the problem here is that bulkValues keeps the address of the pointer so when scanArgs changes, bulkValues also changes to latest value of that scanArgs.
You need to use the values variable to get the values like below:
func main() {
fmt.Print("test")
conStr := fmt.Sprintf("%s:%s#tcp(%s)/soverflow", user, pass, server)
db, err := sql.Open("mysql", conStr)
if err != nil {
panic(err.Error())
}
defer db.Close()
err = db.Ping()
if err != nil {
panic(err.Error())
}
rows, err := db.Query("SELECT * FROM city")
if err != nil {
panic(err.Error()) // proper error handling instead of panic in your app
}
columns, err := rows.Columns()
if err != nil {
panic(err.Error()) // proper error handling instead of panic in your app
}
// Make a slice for the values
values := make([]sql.RawBytes, len(columns))
// rows.Scan wants '[]interface{}' as an argument, so we must copy the
// references into such a slice
scanArgs := make([]interface{}, len(values))
for i := range values {
scanArgs[i] = &values[i]
}
// that string will be generated according to len of columns
placeHolders := "( ?, ?, ?, ?, ? )"
// slice will contain all the values at the end
bulkValues := []interface{}{}
valueStrings := make([]string, 0)
// make an interface to keep the record's value
record := make([]interface{}, len(columns))
for rows.Next() {
// get RawBytes from data
err = rows.Scan(scanArgs...)
if err != nil {
panic(err.Error()) // proper error handling instead of panic in your app
}
valueStrings = append(valueStrings, placeHolders)
for i, col := range values {
// you need to be carefull with the datatypes here
// check out the docs for details on here
record[i] = string(value)
}
bulkValues = append(bulkValues, record...)
}
stmStr := fmt.Sprintf("INSERT INTO city_copy2 VALUES %s", strings.Join(valueStrings, ","))
_, err = db.Exec(stmStr, bulkValues...)
if err != nil {
panic(err.Error())
}
}
You can also find the example of the documentation here.
Note: There might be more efficient ways to copy database from psql to mysql but this answer only gives a quick solution for this particular issue that you are having.

golang sql pointer values keep repeating itself

Below is a golang function which is being called with an input channel
func getOptions(inChannel <-chan Param) <-chan ParamOptions {
paramOptions := make(chan ParamOptions )
go func() {
defer close(paramOptions )
var wg sync.WaitGroup
conn, err := sql.Open("mssql", wellConnStr)
if err != nil {
log.Fatal("open connection failed:", err.Error())
}
defer conn.Close()
getParamOptions := func(db *sql.DB, param *Param) {
defer wg.Done()
fmt.Println("querying options for ", param.Code, param.Name)
rows, err := db.Query(`select *
from dbo.ParamOptions where code=? and name=?`, &param.Code, &param.Name)
fmt.Println("results for ", param.Name, param.Code)
if err != nil {
log.Fatal("query failed:", err.Error())
}
defer rows.Close()
found := false
...
paramOptions <- ParamOptions...
break
}
if found == false {
fmt.Println("did not find options for ", param.Code, param.Name)
}
}
for paramInChannel := range paramChannel {
wg.Add(1)
fmt.Println("retrieving inputs for ", paramInChannel.Code, paramInChannel.Name)
**go** getParamOptions(conn, &wellInChannel)
}
wg.Wait()
}()
return paramOptions
}
If i remove the go keyword before calling the function getParamOptions it works without any problems. However if I use go then the last code and name keeps repeating within the the getParamOptions function, even though the options retrieved seems to be of the correct Param, the Code and Name values are being repeated

Golang ssh - how to run multiple commands on the same session?

I'm trying to run multiple commands through ssh but seems that Session.Run allows only one command per session ( unless I'm wrong). I'm wondering how can I bypass this limitation and reuse the session or send a sequence of commands.
The reason is that I need to run sudo su within the same session with the next command ( sh /usr/bin/myscript.sh )
Session.Shell allows for more than one command to be run, by passing your commands in via session.StdinPipe().
Be aware that using this approach will make your life more complicated; instead of having a one-shot function call that runs the command and collects the output once it's complete, you'll need to manage your input buffer (don't forget a \n at the end of a command), wait for output to actually come back from the SSH server, then deal with that output appropriately (if you had multiple commands in flight and want to know what output belongs to what input, you'll need to have a plan to figure that out).
stdinBuf, _ := session.StdinPipe()
err := session.Shell()
stdinBuf.Write([]byte("cd /\n"))
// The command has been sent to the device, but you haven't gotten output back yet.
// Not that you can't send more commands immediately.
stdinBuf.Write([]byte("ls\n"))
// Then you'll want to wait for the response, and watch the stdout buffer for output.
While for your specific problem, you can easily run sudo /path/to/script.sh, it shock me that there wasn't a simple way to run multiple commands on the same session, so I came up with a bit of a hack, YMMV:
func MuxShell(w io.Writer, r io.Reader) (chan<- string, <-chan string) {
in := make(chan string, 1)
out := make(chan string, 1)
var wg sync.WaitGroup
wg.Add(1) //for the shell itself
go func() {
for cmd := range in {
wg.Add(1)
w.Write([]byte(cmd + "\n"))
wg.Wait()
}
}()
go func() {
var (
buf [65 * 1024]byte
t int
)
for {
n, err := r.Read(buf[t:])
if err != nil {
close(in)
close(out)
return
}
t += n
if buf[t-2] == '$' { //assuming the $PS1 == 'sh-4.3$ '
out <- string(buf[:t])
t = 0
wg.Done()
}
}
}()
return in, out
}
func main() {
config := &ssh.ClientConfig{
User: "kf5",
Auth: []ssh.AuthMethod{
ssh.Password("kf5"),
},
}
client, err := ssh.Dial("tcp", "127.0.0.1:22", config)
if err != nil {
panic(err)
}
defer client.Close()
session, err := client.NewSession()
if err != nil {
log.Fatalf("unable to create session: %s", err)
}
defer session.Close()
modes := ssh.TerminalModes{
ssh.ECHO: 0, // disable echoing
ssh.TTY_OP_ISPEED: 14400, // input speed = 14.4kbaud
ssh.TTY_OP_OSPEED: 14400, // output speed = 14.4kbaud
}
if err := session.RequestPty("xterm", 80, 40, modes); err != nil {
log.Fatal(err)
}
w, err := session.StdinPipe()
if err != nil {
panic(err)
}
r, err := session.StdoutPipe()
if err != nil {
panic(err)
}
in, out := MuxShell(w, r)
if err := session.Start("/bin/sh"); err != nil {
log.Fatal(err)
}
<-out //ignore the shell output
in <- "ls -lhav"
fmt.Printf("ls output: %s\n", <-out)
in <- "whoami"
fmt.Printf("whoami: %s\n", <-out)
in <- "exit"
session.Wait()
}
If your shell prompt doesn't end with $ ($ followed by a space), this will deadlock, hence why it's a hack.
NewSession is a method of a connection. You don't need to create a new connection each time. A Session seems to be what this library calls a channel for the client, and many channels are multiplexed in a single connection. Hence:
func executeCmd(cmd []string, hostname string, config *ssh.ClientConfig) string {
conn, err := ssh.Dial("tcp", hostname+":8022", config)
if err != nil {
log.Fatal(err)
}
defer conn.Close()
var stdoutBuf bytes.Buffer
for _, command := range cmd {
session, err := conn.NewSession()
if err != nil {
log.Fatal(err)
}
defer session.Close()
session.Stdout = &stdoutBuf
session.Run(command)
}
return hostname + ": " + stdoutBuf.String()
}
So you open a new session(channel) and you run command within the existing ssh connection but with a new session(channel) each time.
You can use a small trick: sh -c 'cmd1&&cmd2&&cmd3&&cmd4&&etc..'
This is a single command, the actual commands are passed as argument to the shell which will execute them. This is how Docker handles multiple commands.
This works for me.
package main
import (
"fmt"
"golang.org/x/crypto/ssh"
// "io"
"log"
"os"
// Uncomment to store output in variable
//"bytes"
)
type MachineDetails struct {
username, password, hostname, port string
}
func main() {
h1 := MachineDetails{"root", "xxxxx", "x.x.x.x", "22"}
// Uncomment to store output in variable
//var b bytes.Buffer
//sess.Stdout = &b
//sess.Stderr = &b
commands := []string{
"pwd",
"whoami",
"echo 'bye'",
"exit",
}
connectHost(h1, commands)
// Uncomment to store in variable
//fmt.Println(b.String())
}
func connectHost(hostParams MachineDetails, commands []string) {
// SSH client config
config := &ssh.ClientConfig{
User: hostParams.username,
Auth: []ssh.AuthMethod{
ssh.Password(hostParams.password),
},
// Non-production only
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
}
// Connect to host
client, err := ssh.Dial("tcp", hostParams.hostname+":"+hostParams.port, config)
if err != nil {
log.Fatal(err)
}
defer client.Close()
// Create sesssion
sess, err := client.NewSession()
if err != nil {
log.Fatal("Failed to create session: ", err)
}
defer sess.Close()
// Enable system stdout
// Comment these if you uncomment to store in variable
sess.Stdout = os.Stdout
sess.Stderr = os.Stderr
// StdinPipe for commands
stdin, err := sess.StdinPipe()
if err != nil {
log.Fatal(err)
}
// Start remote shell
err = sess.Shell()
if err != nil {
log.Fatal(err)
}
// send the commands
for _, cmd := range commands {
_, err = fmt.Fprintf(stdin, "%s\n", cmd)
if err != nil {
log.Fatal(err)
}
}
// Wait for sess to finish
err = sess.Wait()
if err != nil {
log.Fatal(err)
}
// return sess, stdin, err
}
func createSession() {
}
Really liked OneOfOne's answer which inspired me with a more generalized solution to taken a variable that could match the tail of the read bytes and break the blocking read (also no need to fork two extra threads for blocking read and writes). The known limitation is (as in the original solution) if the matching string comes after 64 * 1024 bytes, then this code will spin forever.
package main
import (
"fmt"
"golang.org/x/crypto/ssh"
"io"
"log"
)
var escapePrompt = []byte{'$', ' '}
func main() {
config := &ssh.ClientConfig{
User: "dummy",
Auth: []ssh.AuthMethod{
ssh.Password("dummy"),
},
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
}
client, err := ssh.Dial("tcp", "127.0.0.1:22", config)
if err != nil {
panic(err)
}
defer client.Close()
session, err := client.NewSession()
if err != nil {
log.Fatalf("unable to create session: %s", err)
}
defer session.Close()
modes := ssh.TerminalModes{
ssh.ECHO: 0, // disable echoing
ssh.TTY_OP_ISPEED: 14400, // input speed = 14.4kbaud
ssh.TTY_OP_OSPEED: 14400, // output speed = 14.4kbaud
}
if err := session.RequestPty("xterm", 80, 40, modes); err != nil {
log.Fatal(err)
}
w, err := session.StdinPipe()
if err != nil {
panic(err)
}
r, err := session.StdoutPipe()
if err != nil {
panic(err)
}
if err := session.Start("/bin/sh"); err != nil {
log.Fatal(err)
}
readUntil(r, escapePrompt) //ignore the shell output
write(w, "ls -lhav")
out, err := readUntil(r, escapePrompt)
fmt.Printf("ls output: %s\n", *out)
write(w, "whoami")
out, err = readUntil(r, escapePrompt)
fmt.Printf("whoami: %s\n", *out)
write(w, "exit")
session.Wait()
}
func write(w io.WriteCloser, command string) error {
_, err := w.Write([]byte(command + "\n"))
return err
}
func readUntil(r io.Reader, matchingByte []byte) (*string, error) {
var buf [64 * 1024]byte
var t int
for {
n, err := r.Read(buf[t:])
if err != nil {
return nil, err
}
t += n
if isMatch(buf[:t], t, matchingByte) {
stringResult := string(buf[:t])
return &stringResult, nil
}
}
}
func isMatch(bytes []byte, t int, matchingBytes []byte) bool {
if t >= len(matchingBytes) {
for i := 0; i < len(matchingBytes); i++ {
if bytes[t - len(matchingBytes) + i] != matchingBytes[i] {
return false
}
}
return true
}
return false
}
get inspiration from this
i spent several days and that answer inspires me to try about using sdtin to run multiple commands, finally succeed. and i want to say i dont know golang at all , hence it may be redundant ,but the code works.
if _, err := w.Write([]byte("sys\r")); err != nil {
panic("Failed to run: " + err.Error())
}
if _, err := w.Write([]byte("wlan\r")); err != nil {
panic("Failed to run: " + err.Error())
}
if _, err := w.Write([]byte("ap-id 2099\r")); err != nil {
panic("Failed to run: " + err.Error())
}
if _, err := w.Write([]byte("ap-group xuebao-free\r")); err != nil {
panic("Failed to run: " + err.Error())
}
if _, err := w.Write([]byte("y\r")); err != nil {
panic("Failed to run: " + err.Error())
}
its function is the same asterminal operation
here is the whole code:
/* switch ssh
*/
package main
import (
"flag"
"fmt"
"io"
"log"
"net"
"os"
"strings"
"sync"
)
import (
"golang.org/x/crypto/ssh"
)
func main() {
//go run ./testConfig.go --username="aaa" --passwd='aaa' --ip_port="192.168.6.87" --cmd='display version'
username := flag.String("username", "aaa", "username")
passwd := flag.String("passwd", "aaa", "password")
ip_port := flag.String("ip_port", "1.1.1.1:22", "ip and port")
cmdstring := flag.String("cmd", "display arp statistics all", "cmdstring")
flag.Parse()
fmt.Println("username:", *username)
fmt.Println("passwd:", *passwd)
fmt.Println("ip_port:", *ip_port)
fmt.Println("cmdstring:", *cmdstring)
config := &ssh.ClientConfig{
User: *username,
Auth: []ssh.AuthMethod{
ssh.Password(*passwd),
},
Config: ssh.Config{
Ciphers: []string{"aes128-cbc", "aes128-ctr"},
},
HostKeyCallback: func(hostname string, remote net.Addr, key ssh.PublicKey) error {
return nil
},
}
// config.Config.Ciphers = append(config.Config.Ciphers, "aes128-cbc")
clinet, err := ssh.Dial("tcp", *ip_port, config)
checkError(err, "connet "+*ip_port)
session, err := clinet.NewSession()
defer session.Close()
checkError(err, "creae shell")
modes := ssh.TerminalModes{
ssh.ECHO: 1, // disable echoing
ssh.TTY_OP_ISPEED: 14400, // input speed = 14.4kbaud
ssh.TTY_OP_OSPEED: 14400, // output speed = 14.4kbaud
}
if err := session.RequestPty("vt100", 80, 40, modes); err != nil {
log.Fatal(err)
}
w, err := session.StdinPipe()
if err != nil {
panic(err)
}
r, err := session.StdoutPipe()
if err != nil {
panic(err)
}
e, err := session.StderrPipe()
if err != nil {
panic(err)
}
in, out := MuxShell(w, r, e)
if err := session.Shell(); err != nil {
log.Fatal(err)
}
<-out //ignore the shell output
in <- *cmdstring
fmt.Printf("%s\n", <-out)
if _, err := w.Write([]byte("sys\r")); err != nil {
panic("Failed to run: " + err.Error())
}
if _, err := w.Write([]byte("wlan\r")); err != nil {
panic("Failed to run: " + err.Error())
}
if _, err := w.Write([]byte("ap-id 2099\r")); err != nil {
panic("Failed to run: " + err.Error())
}
if _, err := w.Write([]byte("ap-group xuebao-free\r")); err != nil {
panic("Failed to run: " + err.Error())
}
if _, err := w.Write([]byte("y\r")); err != nil {
panic("Failed to run: " + err.Error())
}
in <- "quit"
_ = <-out
session.Wait()
}
func checkError(err error, info string) {
if err != nil {
fmt.Printf("%s. error: %s\n", info, err)
os.Exit(1)
}
}
func MuxShell(w io.Writer, r, e io.Reader) (chan<- string, <-chan string) {
in := make(chan string, 5)
out := make(chan string, 5)
var wg sync.WaitGroup
wg.Add(1) //for the shell itself
go func() {
for cmd := range in {
wg.Add(1)
w.Write([]byte(cmd + "\n"))
wg.Wait()
}
}()
go func() {
var (
buf [1024 * 1024]byte
t int
)
for {
n, err := r.Read(buf[t:])
if err != nil {
fmt.Println(err.Error())
close(in)
close(out)
return
}
t += n
result := string(buf[:t])
if strings.Contains(string(buf[t-n:t]), "More") {
w.Write([]byte("\n"))
}
if strings.Contains(result, "username:") ||
strings.Contains(result, "password:") ||
strings.Contains(result, ">") {
out <- string(buf[:t])
t = 0
wg.Done()
}
}
}()
return in, out
}
The following code works for me.
func main() {
key, err := ioutil.ReadFile("path to your key file")
if err != nil {
panic(err)
}
signer, err := ssh.ParsePrivateKey([]byte(key))
if err != nil {
panic(err)
}
config := &ssh.ClientConfig{
User: "ubuntu",
Auth: []ssh.AuthMethod{
ssh.PublicKeys(signer),
},
}
client, err := ssh.Dial("tcp", "52.91.35.179:22", config)
if err != nil {
panic(err)
}
session, err := client.NewSession()
if err != nil {
panic(err)
}
defer session.Close()
session.Stdout = os.Stdout
session.Stderr = os.Stderr
session.Stdin = os.Stdin
session.Shell()
session.Wait()
}

GO lang : Communicate with shell process

I want to execute a shell script from Go.
The shell script takes standard input and echoes the result.
I want to supply this input from GO and use the result.
What I am doing is:
cmd := exec.Command("python","add.py")
in, _ := cmd.StdinPipe()
But how do I read from in?
Here is some code writing to a process, and reading from it:
package main
import (
"bufio"
"fmt"
"os/exec"
)
func main() {
// What we want to calculate
calcs := make([]string, 2)
calcs[0] = "3*3"
calcs[1] = "6+6"
// To store the results
results := make([]string, 2)
cmd := exec.Command("/usr/bin/bc")
in, err := cmd.StdinPipe()
if err != nil {
panic(err)
}
defer in.Close()
out, err := cmd.StdoutPipe()
if err != nil {
panic(err)
}
defer out.Close()
// We want to read line by line
bufOut := bufio.NewReader(out)
// Start the process
if err = cmd.Start(); err != nil {
panic(err)
}
// Write the operations to the process
for _, calc := range calcs {
_, err := in.Write([]byte(calc + "\n"))
if err != nil {
panic(err)
}
}
// Read the results from the process
for i := 0; i < len(results); i++ {
result, _, err := bufOut.ReadLine()
if err != nil {
panic(err)
}
results[i] = string(result)
}
// See what was calculated
for _, result := range results {
fmt.Println(result)
}
}
You might want to read/write from/to the process in different goroutines.