Documentation
¶
Overview ¶
Package csv provides two main features:
1. CSV/TSV validation using struct tags
- Validates row/column values based on `validate:` rules.
- Returns detailed errors (row, column, rule violation).
- Supports multiple languages for error messages.
2. A pandas-like DataFrame API backed by SQL (filesql)
- Enables filtering, selecting, joining, mutating, sorting, casting, and cleaning CSV/TSV data.
- Operations are lazy and compiled into a single SQL query on execution.
- Useful for lightweight data manipulation without Python.
Validation ¶
Define rules using struct tags:
type User struct {
ID int `validate:"numeric"`
Name string `validate:"alpha"`
Score int `validate:"gte=0,lte=100"`
}
Decode() reads the CSV/TSV and applies validation before populating the struct slice.
DataFrame ¶
DataFrame offers a chainable API:
df := csv.NewDataFrame("data.csv").
Select("name", "age").
Filter("age >= 20").
Mutate("decade", "age / 10")
rows, _ := df.Rows()
All transformations are evaluated lazily and executed as SQL via filesql.
Scope ¶
The package currently supports CSV and TSV files. DataFrame is intended as a lightweight, pandas-inspired data manipulation layer for Go, motivated by combining csv processing with the author's filesql engine.
For full examples and details, see the README.
Example (DataFrame_basic) ¶
package main
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/nao1215/csv"
)
func cleanupDir(dir string) {
if err := os.RemoveAll(dir); err != nil {
panic(err)
}
}
func exampleCSV(data string) (string, func()) {
dir, err := os.MkdirTemp("", "dfexample")
if err != nil {
panic(err)
}
path := filepath.Join(dir, "data.csv")
payload := strings.TrimSpace(data) + "\n"
if err := os.WriteFile(path, []byte(payload), 0o600); err != nil {
cleanupDir(dir)
panic(err)
}
cleanup := func() { cleanupDir(dir) }
return path, cleanup
}
func main() {
path, cleanup := exampleCSV(`
id,name,age
1,Alice,23
2,Bob,30`)
defer cleanup()
df := csv.NewDataFrame(path).
Select("name", "age").
Filter("age >= 25").
Mutate("decade", "age / 10").
Sort("age", true)
rows, err := df.Rows()
if err != nil {
panic(err)
}
fmt.Println(rows[0]["name"], rows[0]["decade"])
}
Output: Bob 3
Example (DataFrame_cleaning) ¶
package main
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/nao1215/csv"
)
func cleanupDir(dir string) {
if err := os.RemoveAll(dir); err != nil {
panic(err)
}
}
func exampleCSV(data string) (string, func()) {
dir, err := os.MkdirTemp("", "dfexample")
if err != nil {
panic(err)
}
path := filepath.Join(dir, "data.csv")
payload := strings.TrimSpace(data) + "\n"
if err := os.WriteFile(path, []byte(payload), 0o600); err != nil {
cleanupDir(dir)
panic(err)
}
cleanup := func() { cleanupDir(dir) }
return path, cleanup
}
func main() {
path, cleanup := exampleCSV(`
id,name,score
1,Alice,
2,Bob,80`)
defer cleanup()
df := csv.NewDataFrame(path).
FillNA("score", 0).
Cast("score", "INTEGER").
DropNA("name").
Rename(map[string]string{"score": "final_score"}).
Select("id", "name", "final_score")
shapeRows, shapeCols := df.Shape()
fmt.Printf("%d rows, %d cols\n", shapeRows, shapeCols)
fmt.Println(df.Columns())
}
Output: 2 rows, 3 cols [final_score id name]
Example (DataFrame_join) ¶
package main
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/nao1215/csv"
)
func cleanupDir(dir string) {
if err := os.RemoveAll(dir); err != nil {
panic(err)
}
}
func exampleCSVWithName(name, data string) (string, func()) {
dir, err := os.MkdirTemp("", "dfexample")
if err != nil {
panic(err)
}
path := filepath.Join(dir, name)
payload := strings.TrimSpace(data) + "\n"
if err := os.WriteFile(path, []byte(payload), 0o600); err != nil {
cleanupDir(dir)
panic(err)
}
cleanup := func() { cleanupDir(dir) }
return path, cleanup
}
func main() {
usersPath, cleanupUsers := exampleCSVWithName("users.csv", `
id,name
1,Alice
2,Bob`)
defer cleanupUsers()
purchasesPath, cleanupPurchases := exampleCSVWithName("purchases.csv", `
id,total
1,100`)
defer cleanupPurchases()
result, err := csv.NewDataFrame(usersPath).
LeftJoin(csv.NewDataFrame(purchasesPath), "id").
Rows()
if err != nil {
panic(err)
}
fmt.Println(result[0]["total"])
}
Output: 100
Index ¶
- Variables
- type CSV
- type DataFrame
- func (df DataFrame) Cast(col string, dtype string) DataFrame
- func (df DataFrame) Columns() []string
- func (df DataFrame) DebugSQL() string
- func (df DataFrame) Drop(cols ...string) DataFrame
- func (df DataFrame) DropNA(cols ...string) DataFrame
- func (df DataFrame) Err() error
- func (df DataFrame) FillNA(col string, value any) DataFrame
- func (df DataFrame) Filter(expr string) DataFrame
- func (df DataFrame) FullJoin(other DataFrame, on string) DataFrame
- func (df DataFrame) Head(n int) ([]map[string]any, error)
- func (df DataFrame) Join(other DataFrame, on string) DataFrame
- func (df DataFrame) LeftJoin(other DataFrame, on string) DataFrame
- func (df DataFrame) Merge(other DataFrame, opts MergeOptions) DataFrame
- func (df DataFrame) Mutate(col string, expr string) DataFrame
- func (df DataFrame) Print(w io.Writer) error
- func (df DataFrame) Rename(mapping map[string]string) DataFrame
- func (df DataFrame) RightJoin(other DataFrame, on string) DataFrame
- func (df DataFrame) Rows() ([]map[string]any, error)
- func (df DataFrame) Select(cols ...string) DataFrame
- func (df DataFrame) Shape() (int, int)
- func (df DataFrame) Sort(col string, asc bool) DataFrame
- func (df DataFrame) Tail(n int) ([]map[string]any, error)
- func (df DataFrame) ToCSV(path string) (err error)
- func (df DataFrame) Warnings() []string
- type Error
- type MergeOptions
- type Option
Examples ¶
- Package (DataFrame_basic)
- Package (DataFrame_cleaning)
- Package (DataFrame_join)
- CSV
- DataFrame (JoinFilterSort)
- DataFrame.Columns
- DataFrame.DebugSQL
- DataFrame.Drop
- DataFrame.DropNA
- DataFrame.FillNA
- DataFrame.Filter
- DataFrame.Join
- DataFrame.Merge
- DataFrame.Mutate
- DataFrame.Print
- DataFrame.Rename
- DataFrame.Rows
- DataFrame.Select
- DataFrame.Shape
- DataFrame.Sort
- NewDataFrame
- WithJapaneseLanguage
- WithRussianLanguage
Constants ¶
This section is empty.
Variables ¶
var ( // ErrStructSlicePointerID is the error ID used when the value is not a pointer to a struct slice. ErrStructSlicePointerID = "ErrStructSlicePointer" // ErrInvalidOneOfFormatID is the error ID used when the target is not one of the specified values. ErrInvalidOneOfFormatID = "ErrInvalidOneOfFormat" // ErrInvalidThresholdFormatID is the error ID used when the threshold format is invalid. ErrInvalidThresholdFormatID = "ErrInvalidThresholdFormat" // ErrInvalidBooleanID is the error ID used when the target is not a boolean. ErrInvalidBooleanID = "ErrInvalidBoolean" // ErrInvalidAlphabetID is the error ID used when the target is not an alphabetic character. ErrInvalidAlphabetID = "ErrInvalidAlphabet" // ErrInvalidAlphaSpaceID is the error ID used when the target is not an alphabetic character or space. ErrInvalidAlphaSpaceID = "ErrInvalidAlphaSpace" // ErrInvalidAlphaUnicodeID is the error ID used when the target is not a unicode alphabetic character. ErrInvalidAlphaUnicodeID = "ErrInvalidAlphaUnicode" // ErrInvalidNumericID is the error ID used when the target is not a numeric character. ErrInvalidNumericID = "ErrInvalidNumeric" // ErrInvalidNumberID is the error ID used when the target is not a number. ErrInvalidNumberID = "ErrInvalidNumber" // ErrInvalidEqualFieldFormatID is the error ID used when the eqfield format is invalid. ErrInvalidEqualFieldFormatID = "ErrInvalidEqualFieldFormat" // ErrInvalidNeFieldFormatID is the error ID used when the nefield format is invalid. ErrInvalidNeFieldFormatID = "ErrInvalidNeFieldFormat" // ErrInvalidFieldContainsFormatID is the error ID used when the fieldcontains format is invalid. ErrInvalidFieldContainsFormatID = "ErrInvalidFieldContainsFormat" // ErrInvalidFieldExcludesFormatID is the error ID used when the fieldexcludes format is invalid. ErrInvalidFieldExcludesFormatID = "ErrInvalidFieldExcludesFormat" // ErrInvalidGteFieldFormatID is the error ID used when the gtefield format is invalid. ErrInvalidGteFieldFormatID = "ErrInvalidGteFieldFormat" // ErrInvalidGtFieldFormatID is the error ID used when the gtfield format is invalid. ErrInvalidGtFieldFormatID = "ErrInvalidGtFieldFormat" // ErrInvalidLteFieldFormatID is the error ID used when the ltefield format is invalid. ErrInvalidLteFieldFormatID = "ErrInvalidLteFieldFormat" // ErrInvalidLtFieldFormatID is the error ID used when the ltfield format is invalid. ErrInvalidLtFieldFormatID = "ErrInvalidLtFieldFormat" // ErrEqualFieldID is the error ID used when the target is not equal to the specified field. ErrEqualFieldID = "ErrEqualField" // ErrNeFieldID is the error ID used when the target is equal to the specified field. ErrNeFieldID = "ErrNeField" // ErrFieldContainsID is the error ID used when the target does not contain the specified field value. ErrFieldContainsID = "ErrFieldContains" // ErrFieldExcludesID is the error ID used when the target contains the specified excluded field value. ErrFieldExcludesID = "ErrFieldExcludes" // ErrGteFieldID is the error ID used when the target is not greater than or equal to the specified field. ErrGteFieldID = "ErrGteField" // ErrGtFieldID is the error ID used when the target is not greater than the specified field. ErrGtFieldID = "ErrGtField" // ErrLteFieldID is the error ID used when the target is not less than or equal to the specified field. ErrLteFieldID = "ErrLteField" // ErrLtFieldID is the error ID used when the target is not less than the specified field. ErrLtFieldID = "ErrLtField" // ErrInvalidAlphanumericID is the error ID used when the target is not an alphanumeric character. ErrInvalidAlphanumericID = "ErrInvalidAlphanumeric" // ErrInvalidAlphanumericUnicodeID is the error ID used when the target is not an alphanumeric unicode character. ErrInvalidAlphanumericUnicodeID = "ErrInvalidAlphanumericUnicode" // ErrInvalidContainsRuneID is the error ID used when the target does not contain the specified rune. ErrInvalidContainsRuneID = "ErrInvalidContainsRune" // ErrInvalidContainsRuneFormatID is the error ID used when the containsrune format is invalid. ErrInvalidContainsRuneFormatID = "ErrInvalidContainsRuneFormat" // ErrRequiredID is the error ID used when the target is required but is empty. ErrRequiredID = "ErrRequired" // ErrEqualID is the error ID used when the target is not equal to the threshold value. ErrEqualID = "ErrEqual" // ErrEqualIgnoreCaseID is the error ID used when the target is not equal to the specified value ignoring case. ErrEqualIgnoreCaseID = "ErrEqualIgnoreCase" // ErrInvalidThresholdID is the error ID used when the threshold value is invalid. ErrInvalidThresholdID = "ErrInvalidThreshold" // ErrNotEqualID is the error ID used when the target is equal to the threshold value. ErrNotEqualID = "ErrNotEqual" // ErrNotEqualIgnoreCaseID is the error ID used when the target is equal to the specified value ignoring case. ErrNotEqualIgnoreCaseID = "ErrNotEqualIgnoreCase" // ErrInvalidEqualIgnoreCaseFormatID is the error ID used when the eq_ignore_case format is invalid. ErrInvalidEqualIgnoreCaseFormatID = "ErrInvalidEqualIgnoreCaseFormat" // ErrInvalidNotEqualIgnoreCaseFormatID is the error ID used when the ne_ignore_case format is invalid. ErrInvalidNotEqualIgnoreCaseFormatID = "ErrInvalidNotEqualIgnoreCaseFormat" // ErrGreaterThanID is the error ID used when the target is not greater than the threshold value. ErrGreaterThanID = "ErrGreaterThan" // ErrGreaterThanEqualID is the error ID used when the target is not greater than or equal to the threshold value. ErrGreaterThanEqualID = "ErrGreaterThanEqual" // ErrLessThanID is the error ID used when the target is not less than the threshold value. ErrLessThanID = "ErrLessThan" // ErrLessThanEqualID is the error ID used when the target is not less than or equal to the threshold value. ErrLessThanEqualID = "ErrLessThanEqual" // ErrMinID is the error ID used when the target is less than the minimum value. ErrMinID = "ErrMin" // ErrMaxID is the error ID used when the target is greater than the maximum value. ErrMaxID = "ErrMax" // ErrLengthID is the error ID used when the target length is not equal to the threshold value. ErrLengthID = "ErrLength" // ErrOneOfID is the error ID used when the target is not one of the specified values. ErrOneOfID = "ErrOneOf" // ErrInvalidStructID is the error ID used when the target is not a struct. ErrInvalidStructID = "ErrInvalidStruct" // ErrUnsupportedTypeID is the error ID used when the target is an unsupported type. ErrUnsupportedTypeID = "ErrUnsupportedType" // ErrLowercaseID is the error ID used when the target is not a lowercase character. ErrLowercaseID = "ErrLowercase" // ErrUppercaseID is the error ID used when the target is not an uppercase character. ErrUppercaseID = "ErrUppercase" // ErrASCIIID is the error ID used when the target is not an ASCII character. ErrASCIIID = "ErrASCII" // ErrURIID is the error ID used when the target is not a URI. ErrURIID = "ErrURI" // ErrURLID is the error ID used when the target is not a URL. ErrURLID = "ErrURL" // ErrHTTPURLID is the error ID used when the target is not an HTTP or HTTPS URL. ErrHTTPURLID = "ErrHTTPURL" // ErrHTTPSURLID is the error ID used when the target is not an HTTPS URL. ErrHTTPSURLID = "ErrHTTPSURL" // ErrURLEncodedID is the error ID used when the target is not URL encoded. ErrURLEncodedID = "ErrURLEncoded" // ErrDataURIID is the error ID used when the target is not a valid data URI. ErrDataURIID = "ErrDataURI" // ErrHostnameID is the error ID used when the target is not a valid hostname (RFC 952). ErrHostnameID = "ErrHostname" // ErrHostnameRFC1123ID is the error ID used when the target is not a valid hostname (RFC 1123). ErrHostnameRFC1123ID = "ErrHostnameRFC1123" // ErrHostnamePortID is the error ID used when the target is not a valid hostname:port. ErrHostnamePortID = "ErrHostnamePort" // ErrFQDNID is the error ID used when the target is not a valid fully qualified domain name. ErrFQDNID = "ErrFQDN" // ErrIPAddrID is the error ID used when the target is not an IP address (ip_addr). ErrIPAddrID = "ErrIPAddr" // ErrIPv4ID is the error ID used when the target is not an IPv4 address. ErrIPv4ID = "ErrIPv4" // ErrIPv6ID is the error ID used when the target is not an IPv6 address. ErrIPv6ID = "ErrIPv6" // ErrUUIDID is the error ID used when the target is not a UUID. ErrUUIDID = "ErrUUID" // ErrEmailID is the error ID used when the target is not an email. ErrEmailID = "ErrEmail" // ErrStartsWithID is the error ID used when the target does not start with the specified value. ErrStartsWithID = "ErrStartsWith" // ErrStartsNotWithID is the error ID used when the target starts with the specified value (should not). ErrStartsNotWithID = "ErrStartsNotWith" // ErrInvalidStartsWithFormatID is the error ID used when the startswith format is invalid. ErrInvalidStartsWithFormatID = "ErrInvalidStartsWithFormat" // ErrInvalidStartsNotWithFormatID is the error ID used when the startsnotwith format is invalid. ErrInvalidStartsNotWithFormatID = "ErrInvalidStartsNotWithFormat" // ErrEndsWithID is the error ID used when the target does not end with the specified value. ErrEndsWithID = "ErrEndsWith" // ErrInvalidEndsWithFormatID is the error ID used when the endswith format is invalid. ErrInvalidEndsWithFormatID = "ErrInvalidEndsWithFormat" // ErrEndsNotWithID is the error ID used when the target ends with the specified value (should not). ErrEndsNotWithID = "ErrEndsNotWith" // ErrInvalidEndsNotWithFormatID is the error ID used when the endsnotwith format is invalid. ErrInvalidEndsNotWithFormatID = "ErrInvalidEndsNotWithFormat" // ErrExcludesID is the error ID used when the target contains the specified excluded value. ErrExcludesID = "ErrExcludes" // ErrInvalidExcludesFormatID is the error ID used when the excludes format is invalid. ErrInvalidExcludesFormatID = "ErrInvalidExcludesFormat" // ErrExcludesAllID is the error ID used when the target contains any of the specified excluded runes. ErrExcludesAllID = "ErrExcludesAll" // ErrInvalidExcludesAllFormatID is the error ID used when the excludesall format is invalid. ErrInvalidExcludesAllFormatID = "ErrInvalidExcludesAllFormat" // ErrExcludesRuneID is the error ID used when the target contains the specified rune. ErrExcludesRuneID = "ErrExcludesRune" // ErrInvalidExcludesRuneFormatID is the error ID used when the excludesrune format is invalid. ErrInvalidExcludesRuneFormatID = "ErrInvalidExcludesRuneFormat" // ErrMultibyteID is the error ID used when the target does not contain multibyte characters. ErrMultibyteID = "ErrMultibyte" // ErrPrintASCIIID is the error ID used when the target contains non printable ASCII characters. ErrPrintASCIIID = "ErrPrintASCII" // ErrCIDRID is the error ID used when the target is not a valid CIDR. ErrCIDRID = "ErrCIDR" // ErrCIDRv4ID is the error ID used when the target is not a valid IPv4 CIDR. ErrCIDRv4ID = "ErrCIDRv4" // ErrCIDRv6ID is the error ID used when the target is not a valid IPv6 CIDR. ErrCIDRv6ID = "ErrCIDRv6" // ErrContainsID is the error ID used when the target does not contain the specified value. ErrContainsID = "ErrContains" // ErrInvalidContainsFormatID is the error ID used when the contains format is invalid. ErrInvalidContainsFormatID = "ErrInvalidContainsFormat" // ErrContainsAnyID is the error ID used when the target does not contain any of the specified values. ErrContainsAnyID = "ErrContainsAny" // ErrInvalidContainsAnyFormatID is the error ID used when the contains any format is invalid. ErrInvalidContainsAnyFormatID = "ErrInvalidContainsAnyFormat" )
var LocaleFS embed.FS
LocaleFS embeds translation files located in the i18n directory.
Functions ¶
This section is empty.
Types ¶
type CSV ¶
type CSV struct {
// contains filtered or unexported fields
}
CSV is a struct that implements CSV Reader and Writer.
Example ¶
package main
import (
"bytes"
"fmt"
"github.com/nao1215/csv"
)
func main() {
input := `id,name,age,password,password_confirm,role,note,nickname,ip,cidr,url
1,Alice,17,Secret123,Secret12,superuser,"TODO: fix",alice!,999.0.0.1,10.0.0.0/33,http://example.com
-5,Bob,30,short,short,admin,"Note: ready",Bob123,192.168.0.1,192.168.0.0/24,https://example.com
`
buf := bytes.NewBufferString(input)
c, err := csv.NewCSV(buf)
if err != nil {
panic(err)
}
type account struct {
ID int `validate:"number,gte=1"`
Name string `validate:"alpha"`
Age int `validate:"number,gte=18,lte=65"`
Password string `validate:"required,gte=8"`
PasswordConfirm string `validate:"eqfield=Password"`
Role string `validate:"oneof=admin user"`
Note string `validate:"excludes=TODO,startswith=Note"`
Nickname string `validate:"alphanumunicode"`
IP string `validate:"ip4_addr"`
CIDR string `validate:"cidrv4"`
URL string `validate:"https_url"`
}
accounts := make([]account, 0)
errs := c.Decode(&accounts)
if len(errs) != 0 {
for _, err := range errs {
fmt.Println(err.Error())
}
}
}
Output: line:2 column age: target is not greater than or equal to the threshold value: threshold=18, value=17 line:2 column password: target is not greater than or equal to the threshold value: value=Secret123 line:2 column role: target is not one of the values: oneof=admin user, value=superuser line:2 column note: target contains a prohibited substring: excludes=TODO, value=TODO: fix line:2 column note: target does not start with the specified value: startswith=Note, value=TODO: fix line:2 column nickname: target is not an alphanumeric unicode character: value=alice! line:2 column ip: target is not a valid IPv4 address: value=999.0.0.1 line:2 column cidr: target is not a valid IPv4 CIDR: value=10.0.0.0/33 line:2 column url: target is not a valid HTTPS URL: value=http://example.com line:2 column password_confirm: field is not equal to the specified field: field=PasswordConfirm, other=Password line:3 column id: target is not greater than or equal to the threshold value: threshold=1, value=-5 line:3 column password: target is not greater than or equal to the threshold value: value=short
type DataFrame ¶ added in v0.3.0
type DataFrame struct {
// contains filtered or unexported fields
}
DataFrame emulates a subset of pandas.DataFrame backed by lazy SQL execution. Operations are accumulated and compiled into a single SQL query at materialization time.
Example (JoinFilterSort) ¶
package main
import (
"bytes"
"fmt"
"path/filepath"
"github.com/nao1215/csv"
)
func main() {
users := csv.NewDataFrame(filepath.Join("testdata", "sample.csv")).
Select("id", "name", "age").
Mutate("age_bucket", "CASE WHEN age >= 30 THEN '30s' ELSE '20s' END")
orders := csv.NewDataFrame(filepath.Join("testdata", "orders.csv")).
Filter("total >= 100").
Mutate("gross_total", "total + 5")
depts := csv.NewDataFrame(filepath.Join("testdata", "departments.csv")).
Select("id", "dept").
Rename(map[string]string{"dept": "dept_name"})
df := users.
Join(orders, "id").
Join(depts, "id").
Filter("age >= 23").
Sort("gross_total", false).
Select("name", "dept_name", "gross_total", "age_bucket")
var buf bytes.Buffer
if err := df.Print(&buf); err != nil {
panic(err)
}
fmt.Print(buf.String())
}
Output: age_bucket dept_name gross_total name 30s Engineering 155 Denis 20s Sales 105 Gina
func NewDataFrame ¶ added in v0.3.0
NewDataFrame behaves similarly to pandas.read_csv, returning a DataFrame backed by the file. The DataFrame records operations lazily until materialization (Rows/Head/etc.).
Example ¶
package main
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/nao1215/csv"
)
func cleanupDir(dir string) {
if err := os.RemoveAll(dir); err != nil {
panic(err)
}
}
func exampleCSV(data string) (string, func()) {
dir, err := os.MkdirTemp("", "dfexample")
if err != nil {
panic(err)
}
path := filepath.Join(dir, "data.csv")
payload := strings.TrimSpace(data) + "\n"
if err := os.WriteFile(path, []byte(payload), 0o600); err != nil {
cleanupDir(dir)
panic(err)
}
cleanup := func() { cleanupDir(dir) }
return path, cleanup
}
func main() {
path, cleanup := exampleCSV(`
id,name
1,Alice`)
defer cleanup()
df := csv.NewDataFrame(path)
rows, err := df.Rows()
if err != nil {
panic(err)
}
fmt.Println(len(rows), rows[0]["name"])
}
Output: 1 Alice
func (DataFrame) Cast ¶ added in v0.3.0
Cast casts a column akin to pandas.Series.astype without eagerly validating column existence. Missing columns are skipped with warnings.
func (DataFrame) Columns ¶ added in v0.3.0
Columns returns the ordered column labels, mimicking pandas.DataFrame.columns.
Example ¶
package main
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/nao1215/csv"
)
func cleanupDir(dir string) {
if err := os.RemoveAll(dir); err != nil {
panic(err)
}
}
func exampleCSV(data string) (string, func()) {
dir, err := os.MkdirTemp("", "dfexample")
if err != nil {
panic(err)
}
path := filepath.Join(dir, "data.csv")
payload := strings.TrimSpace(data) + "\n"
if err := os.WriteFile(path, []byte(payload), 0o600); err != nil {
cleanupDir(dir)
panic(err)
}
cleanup := func() { cleanupDir(dir) }
return path, cleanup
}
func main() {
path, cleanup := exampleCSV(`
id,name
1,Alice`)
defer cleanup()
df := csv.NewDataFrame(path)
fmt.Println(df.Columns())
}
Output: [id name]
func (DataFrame) DebugSQL ¶ added in v0.3.0
DebugSQL returns the lazily constructed SQL statement for inspection. It does not execute the statement and returns an empty string when planning previously failed.
Example ¶
package main
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/nao1215/csv"
)
func cleanupDir(dir string) {
if err := os.RemoveAll(dir); err != nil {
panic(err)
}
}
func exampleCSV(data string) (string, func()) {
dir, err := os.MkdirTemp("", "dfexample")
if err != nil {
panic(err)
}
path := filepath.Join(dir, "data.csv")
payload := strings.TrimSpace(data) + "\n"
if err := os.WriteFile(path, []byte(payload), 0o600); err != nil {
cleanupDir(dir)
panic(err)
}
cleanup := func() { cleanupDir(dir) }
return path, cleanup
}
func main() {
path, cleanup := exampleCSV(`
id,name
1,Alice`)
defer cleanup()
df := csv.NewDataFrame(path).Select("name")
fmt.Println(df.DebugSQL())
}
Output: SELECT data.* FROM data
func (DataFrame) Drop ¶ added in v0.3.0
Drop removes columns similar to pandas.DataFrame.drop, but missing columns emit warnings instead of failing.
Example ¶
package main
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/nao1215/csv"
)
func cleanupDir(dir string) {
if err := os.RemoveAll(dir); err != nil {
panic(err)
}
}
func exampleCSV(data string) (string, func()) {
dir, err := os.MkdirTemp("", "dfexample")
if err != nil {
panic(err)
}
path := filepath.Join(dir, "data.csv")
payload := strings.TrimSpace(data) + "\n"
if err := os.WriteFile(path, []byte(payload), 0o600); err != nil {
cleanupDir(dir)
panic(err)
}
cleanup := func() { cleanupDir(dir) }
return path, cleanup
}
func main() {
path, cleanup := exampleCSV(`
id,name,age
1,Alice,23`)
defer cleanup()
df := csv.NewDataFrame(path).Drop("age")
rows, err := df.Rows()
if err != nil {
panic(err)
}
fmt.Println(rows[0]["name"])
}
Output: Alice
func (DataFrame) DropNA ¶ added in v0.3.0
DropNA behaves like pandas.DataFrame.dropna(subset=cols) with AND semantics across provided columns.
Example ¶
package main
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/nao1215/csv"
)
func cleanupDir(dir string) {
if err := os.RemoveAll(dir); err != nil {
panic(err)
}
}
func exampleCSV(data string) (string, func()) {
dir, err := os.MkdirTemp("", "dfexample")
if err != nil {
panic(err)
}
path := filepath.Join(dir, "data.csv")
payload := strings.TrimSpace(data) + "\n"
if err := os.WriteFile(path, []byte(payload), 0o600); err != nil {
cleanupDir(dir)
panic(err)
}
cleanup := func() { cleanupDir(dir) }
return path, cleanup
}
func main() {
path, cleanup := exampleCSV(`
id,value
1,
2,5`)
defer cleanup()
df := csv.NewDataFrame(path).
Mutate("clean_value", "NULLIF(value, '')").
DropNA("clean_value").
Select("id")
rows, err := df.Rows()
if err != nil {
panic(err)
}
fmt.Println(len(rows))
}
Output: 1
func (DataFrame) Err ¶ added in v0.3.0
Err surfaces deferred planning errors (for example, invalid Merge/Join options).
func (DataFrame) FillNA ¶ added in v0.3.0
FillNA mirrors pandas.Series.fillna for a column but records warnings if the column is absent.
Example ¶
package main
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/nao1215/csv"
)
func cleanupDir(dir string) {
if err := os.RemoveAll(dir); err != nil {
panic(err)
}
}
func exampleCSV(data string) (string, func()) {
dir, err := os.MkdirTemp("", "dfexample")
if err != nil {
panic(err)
}
path := filepath.Join(dir, "data.csv")
payload := strings.TrimSpace(data) + "\n"
if err := os.WriteFile(path, []byte(payload), 0o600); err != nil {
cleanupDir(dir)
panic(err)
}
cleanup := func() { cleanupDir(dir) }
return path, cleanup
}
func main() {
path, cleanup := exampleCSV(`
id,value
1,
2,5`)
defer cleanup()
df := csv.NewDataFrame(path).FillNA("value", 0)
fmt.Println(strings.Contains(df.DebugSQL(), "IFNULL(value, 0)"))
}
Output: true
func (DataFrame) Filter ¶ added in v0.3.0
Filter acts like pandas.DataFrame.query, returning a new lazy DataFrame with an added WHERE clause. Expressions are passed directly through to SQLite: never concatenate untrusted user input here without validation. Invalid columns are reported through Err/Warns when executed.
Example ¶
package main
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/nao1215/csv"
)
func cleanupDir(dir string) {
if err := os.RemoveAll(dir); err != nil {
panic(err)
}
}
func exampleCSV(data string) (string, func()) {
dir, err := os.MkdirTemp("", "dfexample")
if err != nil {
panic(err)
}
path := filepath.Join(dir, "data.csv")
payload := strings.TrimSpace(data) + "\n"
if err := os.WriteFile(path, []byte(payload), 0o600); err != nil {
cleanupDir(dir)
panic(err)
}
cleanup := func() { cleanupDir(dir) }
return path, cleanup
}
func main() {
path, cleanup := exampleCSV(`
id,age
1,10
2,20`)
defer cleanup()
df := csv.NewDataFrame(path).Filter("age >= 15")
rows, err := df.Rows()
if err != nil {
panic(err)
}
fmt.Println(rows[0]["age"])
}
Output: 20
func (DataFrame) FullJoin ¶ added in v0.3.0
FullJoin performs a FULL OUTER JOIN similar to pandas.merge(..., how="outer"). Support depends on the SQL engine; SQLite backends typically do not support FULL OUTER JOIN. TODO: emulate FULL OUTER JOIN (LEFT/RIGHT UNION) for SQLite so callers see pandas-like results.
func (DataFrame) Head ¶ added in v0.3.0
Head returns the first n rows, matching pandas.DataFrame.head.
func (DataFrame) Join ¶ added in v0.3.0
Join performs an INNER JOIN similar to pandas.merge(how="inner") using a single key.
Example ¶
package main
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/nao1215/csv"
)
func cleanupDir(dir string) {
if err := os.RemoveAll(dir); err != nil {
panic(err)
}
}
func exampleCSVWithName(name, data string) (string, func()) {
dir, err := os.MkdirTemp("", "dfexample")
if err != nil {
panic(err)
}
path := filepath.Join(dir, name)
payload := strings.TrimSpace(data) + "\n"
if err := os.WriteFile(path, []byte(payload), 0o600); err != nil {
cleanupDir(dir)
panic(err)
}
cleanup := func() { cleanupDir(dir) }
return path, cleanup
}
func main() {
leftPath, leftCleanup := exampleCSVWithName("users.csv", `
id,name
1,Alice`)
defer leftCleanup()
rightPath, rightCleanup := exampleCSVWithName("scores.csv", `
id,score
1,80`)
defer rightCleanup()
left := csv.NewDataFrame(leftPath)
right := csv.NewDataFrame(rightPath)
df := left.Join(right, "id").Select("name", "score")
rows, err := df.Rows()
if err != nil {
panic(err)
}
fmt.Println(rows[0]["score"])
}
Output: 80
func (DataFrame) LeftJoin ¶ added in v0.3.0
LeftJoin performs a LEFT JOIN similar to pandas.merge(..., how="left").
func (DataFrame) Merge ¶ added in v0.3.0
func (df DataFrame) Merge(other DataFrame, opts MergeOptions) DataFrame
Merge merges two DataFrames similarly to pandas.merge while validating join keys eagerly. Missing join keys record an error retrievable via Err().
Example ¶
package main
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/nao1215/csv"
)
func cleanupDir(dir string) {
if err := os.RemoveAll(dir); err != nil {
panic(err)
}
}
func exampleCSVWithName(name, data string) (string, func()) {
dir, err := os.MkdirTemp("", "dfexample")
if err != nil {
panic(err)
}
path := filepath.Join(dir, name)
payload := strings.TrimSpace(data) + "\n"
if err := os.WriteFile(path, []byte(payload), 0o600); err != nil {
cleanupDir(dir)
panic(err)
}
cleanup := func() { cleanupDir(dir) }
return path, cleanup
}
func main() {
leftPath, leftCleanup := exampleCSVWithName("users.csv", `
id,name
1,Alice`)
defer leftCleanup()
rightPath, rightCleanup := exampleCSVWithName("scores.csv", `
id,score
1,80`)
defer rightCleanup()
df := csv.NewDataFrame(leftPath).Merge(csv.NewDataFrame(rightPath), csv.MergeOptions{OnKey: "id"})
rows, err := df.Rows()
if err != nil {
panic(err)
}
fmt.Println(rows[0]["score"])
}
Output: 80
func (DataFrame) Mutate ¶ added in v0.3.0
Mutate behaves like pandas.DataFrame.assign, creating derived columns backed by SQL expressions. Expressions reference the current column names and are evaluated lazily.
Example ¶
package main
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/nao1215/csv"
)
func cleanupDir(dir string) {
if err := os.RemoveAll(dir); err != nil {
panic(err)
}
}
func exampleCSV(data string) (string, func()) {
dir, err := os.MkdirTemp("", "dfexample")
if err != nil {
panic(err)
}
path := filepath.Join(dir, "data.csv")
payload := strings.TrimSpace(data) + "\n"
if err := os.WriteFile(path, []byte(payload), 0o600); err != nil {
cleanupDir(dir)
panic(err)
}
cleanup := func() { cleanupDir(dir) }
return path, cleanup
}
func main() {
path, cleanup := exampleCSV(`
id,value
1,10`)
defer cleanup()
df := csv.NewDataFrame(path).Mutate("double_value", "value * 2")
rows, err := df.Rows()
if err != nil {
panic(err)
}
fmt.Println(rows[0]["double_value"])
}
Output: 20
func (DataFrame) Print ¶ added in v0.3.0
Print renders the DataFrame similar to pandas.DataFrame.to_string.
Example ¶
package main
import (
"bytes"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/nao1215/csv"
)
func cleanupDir(dir string) {
if err := os.RemoveAll(dir); err != nil {
panic(err)
}
}
func exampleCSV(data string) (string, func()) {
dir, err := os.MkdirTemp("", "dfexample")
if err != nil {
panic(err)
}
path := filepath.Join(dir, "data.csv")
payload := strings.TrimSpace(data) + "\n"
if err := os.WriteFile(path, []byte(payload), 0o600); err != nil {
cleanupDir(dir)
panic(err)
}
cleanup := func() { cleanupDir(dir) }
return path, cleanup
}
func main() {
path, cleanup := exampleCSV(`
id,name
1,Alice`)
defer cleanup()
df := csv.NewDataFrame(path)
var buf bytes.Buffer
if err := df.Print(&buf); err != nil {
panic(err)
}
fmt.Print(buf.String())
}
Output: id name 1 Alice
func (DataFrame) Rename ¶ added in v0.3.0
Rename performs column renaming like pandas.DataFrame.rename in a lazy fashion. Missing columns are ignored, and a warning is recorded instead of raising an error.
Example ¶
package main
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/nao1215/csv"
)
func cleanupDir(dir string) {
if err := os.RemoveAll(dir); err != nil {
panic(err)
}
}
func exampleCSV(data string) (string, func()) {
dir, err := os.MkdirTemp("", "dfexample")
if err != nil {
panic(err)
}
path := filepath.Join(dir, "data.csv")
payload := strings.TrimSpace(data) + "\n"
if err := os.WriteFile(path, []byte(payload), 0o600); err != nil {
cleanupDir(dir)
panic(err)
}
cleanup := func() { cleanupDir(dir) }
return path, cleanup
}
func main() {
path, cleanup := exampleCSV(`
id,name
1,Alice`)
defer cleanup()
df := csv.NewDataFrame(path).Rename(map[string]string{"name": "full_name"})
rows, err := df.Rows()
if err != nil {
panic(err)
}
fmt.Println(rows[0]["full_name"])
}
Output: Alice
func (DataFrame) RightJoin ¶ added in v0.3.0
RightJoin performs a RIGHT JOIN similar to pandas.merge(..., how="right"). Support depends on the underlying SQL engine configured via filesql; SQLite backends may not support RIGHT JOIN. TODO: emulate RIGHT JOIN for SQLite (e.g., via UNION) so behavior matches pandas even on limited engines.
func (DataFrame) Rows ¶ added in v0.3.0
Rows materializes the DataFrame similar to pandas.DataFrame.to_dict("records").
Example ¶
package main
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/nao1215/csv"
)
func cleanupDir(dir string) {
if err := os.RemoveAll(dir); err != nil {
panic(err)
}
}
func exampleCSV(data string) (string, func()) {
dir, err := os.MkdirTemp("", "dfexample")
if err != nil {
panic(err)
}
path := filepath.Join(dir, "data.csv")
payload := strings.TrimSpace(data) + "\n"
if err := os.WriteFile(path, []byte(payload), 0o600); err != nil {
cleanupDir(dir)
panic(err)
}
cleanup := func() { cleanupDir(dir) }
return path, cleanup
}
func main() {
path, cleanup := exampleCSV(`
id,name
1,Alice`)
defer cleanup()
df := csv.NewDataFrame(path)
rows, err := df.Rows()
if err != nil {
panic(err)
}
fmt.Println(len(rows))
}
Output: 1
func (DataFrame) Select ¶ added in v0.3.0
Select mirrors pandas column projection on a lazy SQL query. Nonexistent columns are skipped while emitting warnings retrievable via Warnings().
Example ¶
package main
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/nao1215/csv"
)
func cleanupDir(dir string) {
if err := os.RemoveAll(dir); err != nil {
panic(err)
}
}
func exampleCSV(data string) (string, func()) {
dir, err := os.MkdirTemp("", "dfexample")
if err != nil {
panic(err)
}
path := filepath.Join(dir, "data.csv")
payload := strings.TrimSpace(data) + "\n"
if err := os.WriteFile(path, []byte(payload), 0o600); err != nil {
cleanupDir(dir)
panic(err)
}
cleanup := func() { cleanupDir(dir) }
return path, cleanup
}
func main() {
path, cleanup := exampleCSV(`
id,name,age
1,Alice,23`)
defer cleanup()
df := csv.NewDataFrame(path).Select("name")
rows, err := df.Rows()
if err != nil {
panic(err)
}
fmt.Println(rows[0]["name"])
}
Output: Alice
func (DataFrame) Shape ¶ added in v0.3.0
Shape returns (rows, cols) like pandas.DataFrame.shape, materializing the DataFrame if needed.
Example ¶
package main
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/nao1215/csv"
)
func cleanupDir(dir string) {
if err := os.RemoveAll(dir); err != nil {
panic(err)
}
}
func exampleCSV(data string) (string, func()) {
dir, err := os.MkdirTemp("", "dfexample")
if err != nil {
panic(err)
}
path := filepath.Join(dir, "data.csv")
payload := strings.TrimSpace(data) + "\n"
if err := os.WriteFile(path, []byte(payload), 0o600); err != nil {
cleanupDir(dir)
panic(err)
}
cleanup := func() { cleanupDir(dir) }
return path, cleanup
}
func main() {
path, cleanup := exampleCSV(`
id,name
1,Alice`)
defer cleanup()
df := csv.NewDataFrame(path)
r, c := df.Shape()
fmt.Println(r, c)
}
Output: 1 2
func (DataFrame) Sort ¶ added in v0.3.0
Sort matches pandas.DataFrame.sort_values for a single column, ordering results at execution time.
Example ¶
package main
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/nao1215/csv"
)
func cleanupDir(dir string) {
if err := os.RemoveAll(dir); err != nil {
panic(err)
}
}
func exampleCSV(data string) (string, func()) {
dir, err := os.MkdirTemp("", "dfexample")
if err != nil {
panic(err)
}
path := filepath.Join(dir, "data.csv")
payload := strings.TrimSpace(data) + "\n"
if err := os.WriteFile(path, []byte(payload), 0o600); err != nil {
cleanupDir(dir)
panic(err)
}
cleanup := func() { cleanupDir(dir) }
return path, cleanup
}
func main() {
path, cleanup := exampleCSV(`
id,value
1,10
2,5`)
defer cleanup()
df := csv.NewDataFrame(path).Sort("value", true)
rows, err := df.Rows()
if err != nil {
panic(err)
}
fmt.Println(rows[0]["value"])
}
Output: 5
func (DataFrame) Tail ¶ added in v0.3.0
Tail mirrors pandas.DataFrame.tail by fetching all rows before slicing.
type Error ¶ added in v0.1.0
type Error struct {
// contains filtered or unexported fields
}
Error is an error that is used to localize error messages.
type MergeOptions ¶ added in v0.3.0
type MergeOptions struct {
On []string
OnKey string
// How accepts "", "inner", "left", "right", "outer", or "full".
How string
Suffixes [2]string
}
MergeOptions configures Merge; set either On or OnKey and optionally How/Suffixes.
type Option ¶
Option is a function that sets a configuration option for CSV struct.
func WithHeaderless ¶
func WithHeaderless() Option
WithHeaderless is an Option that sets the headerless flag to true.
func WithJapaneseLanguage ¶ added in v0.1.0
func WithJapaneseLanguage() Option
WithJapaneseLanguage is an Option that sets the i18n bundle to Japanese.
Example ¶
package main
import (
"bytes"
"fmt"
"github.com/nao1215/csv"
)
func main() {
input := `id,name,age
1,Gina,23
a,Yulia,25
3,Den1s,30
`
buf := bytes.NewBufferString(input)
c, err := csv.NewCSV(buf, csv.WithJapaneseLanguage())
if err != nil {
panic(err)
}
type person struct {
ID int `validate:"numeric"`
Name string `validate:"alpha"`
Age int `validate:"gt=24"`
}
people := make([]person, 0)
errs := c.Decode(&people)
if len(errs) != 0 {
for _, err := range errs {
fmt.Println(err.Error())
}
}
}
Output: line:2 column age: 値がしきい値より大きくありません: threshold=24, value=23 line:3 column id: 値が数字ではありません: value=a line:4 column name: 値がアルファベット文字ではありません: value=Den1s
func WithRussianLanguage ¶ added in v0.1.0
func WithRussianLanguage() Option
WithRussianLanguage is an Option that sets the i18n bundle to Russian.
Example ¶
package main
import (
"bytes"
"fmt"
"github.com/nao1215/csv"
)
func main() {
input := `id,name,age
1,Gina,23
a,Yulia,25
3,Den1s,30
`
buf := bytes.NewBufferString(input)
c, err := csv.NewCSV(buf, csv.WithRussianLanguage())
if err != nil {
panic(err)
}
type person struct {
ID int `validate:"numeric"`
Name string `validate:"alpha"`
Age int `validate:"gt=24"`
}
people := make([]person, 0)
errs := c.Decode(&people)
if len(errs) != 0 {
for _, err := range errs {
fmt.Println(err.Error())
}
}
}
Output: line:2 column age: целевое значение не больше порогового значения: threshold=24, value=23 line:3 column id: целевое значение не является числовым символом: value=a line:4 column name: целевое значение не является алфавитным символом: value=Den1s
func WithTabDelimiter ¶
func WithTabDelimiter() Option
WithTabDelimiter is an Option that sets the delimiter to a tab character.
