Merge pull request #110 from digineo/time-parsing

Fix time parsing and reduce syscalls
This commit is contained in:
Julien Laffaye 2018-01-05 08:56:08 +01:00 committed by GitHub
commit 83891dbe00
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 128 additions and 50 deletions

9
ftp.go
View File

@ -348,14 +348,14 @@ func (c *ServerConn) NameList(path string) (entries []string, err error) {
// List issues a LIST FTP command.
func (c *ServerConn) List(path string) (entries []*Entry, err error) {
var cmd string
var parseFunc func(string) (*Entry, error)
var parser parseFunc
if c.mlstSupported {
cmd = "MLSD"
parseFunc = parseRFC3659ListLine
parser = parseRFC3659ListLine
} else {
cmd = "LIST"
parseFunc = parseListLine
parser = parseListLine
}
conn, err := c.cmdDataConnFrom(0, "%s %s", cmd, path)
@ -367,8 +367,9 @@ func (c *ServerConn) List(path string) (entries []*Entry, err error) {
defer r.Close()
scanner := bufio.NewScanner(r)
now := time.Now()
for scanner.Scan() {
entry, err := parseFunc(scanner.Text())
entry, err := parser(scanner.Text(), now)
if err == nil {
entries = append(entries, entry)
}

View File

@ -2,6 +2,7 @@ package ftp
import (
"errors"
"fmt"
"strconv"
"strings"
"time"
@ -9,7 +10,9 @@ import (
var errUnsupportedListLine = errors.New("Unsupported LIST line")
var listLineParsers = []func(line string) (*Entry, error){
type parseFunc func(string, time.Time) (*Entry, error)
var listLineParsers = []parseFunc{
parseRFC3659ListLine,
parseLsListLine,
parseDirListLine,
@ -22,7 +25,7 @@ var dirTimeFormats = []string{
}
// parseRFC3659ListLine parses the style of directory line defined in RFC 3659.
func parseRFC3659ListLine(line string) (*Entry, error) {
func parseRFC3659ListLine(line string, now time.Time) (*Entry, error) {
iSemicolon := strings.Index(line, ";")
iWhitespace := strings.Index(line, " ")
@ -66,7 +69,7 @@ func parseRFC3659ListLine(line string) (*Entry, error) {
// parseLsListLine parses a directory line in a format based on the output of
// the UNIX ls command.
func parseLsListLine(line string) (*Entry, error) {
func parseLsListLine(line string, now time.Time) (*Entry, error) {
// Has the first field a length of 10 bytes?
if strings.IndexByte(line, ' ') != 10 {
@ -85,7 +88,7 @@ func parseLsListLine(line string) (*Entry, error) {
Type: EntryTypeFolder,
Name: scanner.Remaining(),
}
if err := e.setTime(fields[3:6]); err != nil {
if err := e.setTime(fields[3:6], now); err != nil {
return nil, err
}
@ -102,7 +105,7 @@ func parseLsListLine(line string) (*Entry, error) {
if err := e.setSize(fields[2]); err != nil {
return nil, errUnsupportedListLine
}
if err := e.setTime(fields[4:7]); err != nil {
if err := e.setTime(fields[4:7], now); err != nil {
return nil, err
}
@ -132,7 +135,7 @@ func parseLsListLine(line string) (*Entry, error) {
return nil, errors.New("Unknown entry type")
}
if err := e.setTime(fields[5:8]); err != nil {
if err := e.setTime(fields[5:8], now); err != nil {
return nil, err
}
@ -141,7 +144,7 @@ func parseLsListLine(line string) (*Entry, error) {
// parseDirListLine parses a directory line in a format based on the output of
// the MS-DOS DIR command.
func parseDirListLine(line string) (*Entry, error) {
func parseDirListLine(line string, now time.Time) (*Entry, error) {
e := &Entry{}
var err error
@ -185,7 +188,7 @@ func parseDirListLine(line string) (*Entry, error) {
// by hostedftp.com
// -r-------- 0 user group 65222236 Feb 24 00:39 UABlacklistingWeek8.csv
// (The link count is inexplicably 0)
func parseHostedFTPLine(line string) (*Entry, error) {
func parseHostedFTPLine(line string, now time.Time) (*Entry, error) {
// Has the first field a length of 10 bytes?
if strings.IndexByte(line, ' ') != 10 {
return nil, errUnsupportedListLine
@ -199,14 +202,14 @@ func parseHostedFTPLine(line string) (*Entry, error) {
}
// Set link count to 1 and attempt to parse as Unix.
return parseLsListLine(fields[0] + " 1 " + scanner.Remaining())
return parseLsListLine(fields[0]+" 1 "+scanner.Remaining(), now)
}
// parseListLine parses the various non-standard format returned by the LIST
// FTP command.
func parseListLine(line string) (*Entry, error) {
func parseListLine(line string, now time.Time) (*Entry, error) {
for _, f := range listLineParsers {
e, err := f(line)
e, err := f(line, now)
if err != errUnsupportedListLine {
return e, err
}
@ -219,17 +222,34 @@ func (e *Entry) setSize(str string) (err error) {
return
}
func (e *Entry) setTime(fields []string) (err error) {
var timeStr string
if strings.Contains(fields[2], ":") { // this year
thisYear, _, _ := time.Now().Date()
timeStr = fields[1] + " " + fields[0] + " " + strconv.Itoa(thisYear)[2:4] + " " + fields[2] + " GMT"
} else { // not this year
func (e *Entry) setTime(fields []string, now time.Time) (err error) {
if strings.Contains(fields[2], ":") { // contains time
thisYear, _, _ := now.Date()
timeStr := fmt.Sprintf("%s %s %d %s GMT", fields[1], fields[0], thisYear, fields[2])
e.Time, err = time.Parse("_2 Jan 2006 15:04 MST", timeStr)
/*
On unix, `info ls` shows:
10.1.6 Formatting file timestamps
---------------------------------
A timestamp is considered to be recent if it is less than six
months old, and is not dated in the future. If a timestamp dated today
is not listed in recent form, the timestamp is in the future, which
means you probably have clock skew problems which may break programs
like make that rely on file timestamps.
*/
if !e.Time.Before(now.AddDate(0, 6, 0)) {
e.Time = e.Time.AddDate(-1, 0, 0)
}
} else { // only the date
if len(fields[2]) != 4 {
return errors.New("Invalid year format in time string")
}
timeStr = fields[1] + " " + fields[0] + " " + fields[2][2:4] + " 00:00 GMT"
timeStr := fmt.Sprintf("%s %s %s 00:00 GMT", fields[1], fields[0], fields[2])
e.Time, err = time.Parse("_2 Jan 2006 15:04 MST", timeStr)
}
e.Time, err = time.Parse("_2 Jan 06 15:04 MST", timeStr)
return
}

View File

@ -1,11 +1,18 @@
package ftp
import (
"strings"
"testing"
"time"
)
var thisYear, _, _ = time.Now().Date()
var (
// now is the current time for all tests
now = newTime(2017, time.March, 10, 23, 00)
thisYear, _, _ = now.Date()
previousYear = thisYear - 1
)
type line struct {
line string
@ -22,45 +29,45 @@ type unsupportedLine struct {
var listTests = []line{
// UNIX ls -l style
{"drwxr-xr-x 3 110 1002 3 Dec 02 2009 pub", "pub", 0, EntryTypeFolder, time.Date(2009, time.December, 2, 0, 0, 0, 0, time.UTC)},
{"drwxr-xr-x 3 110 1002 3 Dec 02 2009 p u b", "p u b", 0, EntryTypeFolder, time.Date(2009, time.December, 2, 0, 0, 0, 0, time.UTC)},
{"-rw-r--r-- 1 marketwired marketwired 12016 Mar 16 2016 2016031611G087802-001.newsml", "2016031611G087802-001.newsml", 12016, EntryTypeFile, time.Date(2016, time.March, 16, 0, 0, 0, 0, time.UTC)},
{"drwxr-xr-x 3 110 1002 3 Dec 02 2009 pub", "pub", 0, EntryTypeFolder, newTime(2009, time.December, 2)},
{"drwxr-xr-x 3 110 1002 3 Dec 02 2009 p u b", "p u b", 0, EntryTypeFolder, newTime(2009, time.December, 2)},
{"-rw-r--r-- 1 marketwired marketwired 12016 Mar 16 2016 2016031611G087802-001.newsml", "2016031611G087802-001.newsml", 12016, EntryTypeFile, newTime(2016, time.March, 16)},
{"-rwxr-xr-x 3 110 1002 1234567 Dec 02 2009 fileName", "fileName", 1234567, EntryTypeFile, time.Date(2009, time.December, 2, 0, 0, 0, 0, time.UTC)},
{"lrwxrwxrwx 1 root other 7 Jan 25 00:17 bin -> usr/bin", "bin -> usr/bin", 0, EntryTypeLink, time.Date(thisYear, time.January, 25, 0, 17, 0, 0, time.UTC)},
{"-rwxr-xr-x 3 110 1002 1234567 Dec 02 2009 fileName", "fileName", 1234567, EntryTypeFile, newTime(2009, time.December, 2)},
{"lrwxrwxrwx 1 root other 7 Jan 25 00:17 bin -> usr/bin", "bin -> usr/bin", 0, EntryTypeLink, newTime(thisYear, time.January, 25, 0, 17)},
// Another ls style
{"drwxr-xr-x folder 0 Aug 15 05:49 !!!-Tipp des Haus!", "!!!-Tipp des Haus!", 0, EntryTypeFolder, time.Date(thisYear, time.August, 15, 5, 49, 0, 0, time.UTC)},
{"drwxrwxrwx folder 0 Aug 11 20:32 P0RN", "P0RN", 0, EntryTypeFolder, time.Date(thisYear, time.August, 11, 20, 32, 0, 0, time.UTC)},
{"-rw-r--r-- 0 18446744073709551615 18446744073709551615 Nov 16 2006 VIDEO_TS.VOB", "VIDEO_TS.VOB", 18446744073709551615, EntryTypeFile, time.Date(2006, time.November, 16, 0, 0, 0, 0, time.UTC)},
{"drwxr-xr-x folder 0 Aug 15 05:49 !!!-Tipp des Haus!", "!!!-Tipp des Haus!", 0, EntryTypeFolder, newTime(thisYear, time.August, 15, 5, 49)},
{"drwxrwxrwx folder 0 Aug 11 20:32 P0RN", "P0RN", 0, EntryTypeFolder, newTime(thisYear, time.August, 11, 20, 32)},
{"-rw-r--r-- 0 18446744073709551615 18446744073709551615 Nov 16 2006 VIDEO_TS.VOB", "VIDEO_TS.VOB", 18446744073709551615, EntryTypeFile, newTime(2006, time.November, 16)},
// Microsoft's FTP servers for Windows
{"---------- 1 owner group 1803128 Jul 10 10:18 ls-lR.Z", "ls-lR.Z", 1803128, EntryTypeFile, time.Date(thisYear, time.July, 10, 10, 18, 0, 0, time.UTC)},
{"d--------- 1 owner group 0 May 9 19:45 Softlib", "Softlib", 0, EntryTypeFolder, time.Date(thisYear, time.May, 9, 19, 45, 0, 0, time.UTC)},
{"---------- 1 owner group 1803128 Jul 10 10:18 ls-lR.Z", "ls-lR.Z", 1803128, EntryTypeFile, newTime(thisYear, time.July, 10, 10, 18)},
{"d--------- 1 owner group 0 Nov 9 19:45 Softlib", "Softlib", 0, EntryTypeFolder, newTime(previousYear, time.November, 9, 19, 45)},
// WFTPD for MSDOS
{"-rwxrwxrwx 1 noone nogroup 322 Aug 19 1996 message.ftp", "message.ftp", 322, EntryTypeFile, time.Date(1996, time.August, 19, 0, 0, 0, 0, time.UTC)},
{"-rwxrwxrwx 1 noone nogroup 322 Aug 19 1996 message.ftp", "message.ftp", 322, EntryTypeFile, newTime(1996, time.August, 19)},
// RFC3659 format: https://tools.ietf.org/html/rfc3659#section-7
{"modify=20150813224845;perm=fle;type=cdir;unique=119FBB87U4;UNIX.group=0;UNIX.mode=0755;UNIX.owner=0; .", ".", 0, EntryTypeFolder, time.Date(2015, time.August, 13, 22, 48, 45, 0, time.UTC)},
{"modify=20150813224845;perm=fle;type=pdir;unique=119FBB87U4;UNIX.group=0;UNIX.mode=0755;UNIX.owner=0; ..", "..", 0, EntryTypeFolder, time.Date(2015, time.August, 13, 22, 48, 45, 0, time.UTC)},
{"modify=20150806235817;perm=fle;type=dir;unique=1B20F360U4;UNIX.group=0;UNIX.mode=0755;UNIX.owner=0; movies", "movies", 0, EntryTypeFolder, time.Date(2015, time.August, 6, 23, 58, 17, 0, time.UTC)},
{"modify=20150814172949;perm=flcdmpe;type=dir;unique=85A0C168U4;UNIX.group=0;UNIX.mode=0777;UNIX.owner=0; _upload", "_upload", 0, EntryTypeFolder, time.Date(2015, time.August, 14, 17, 29, 49, 0, time.UTC)},
{"modify=20150813175250;perm=adfr;size=951;type=file;unique=119FBB87UE;UNIX.group=0;UNIX.mode=0644;UNIX.owner=0; welcome.msg", "welcome.msg", 951, EntryTypeFile, time.Date(2015, time.August, 13, 17, 52, 50, 0, time.UTC)},
{"modify=20150813224845;perm=fle;type=cdir;unique=119FBB87U4;UNIX.group=0;UNIX.mode=0755;UNIX.owner=0; .", ".", 0, EntryTypeFolder, newTime(2015, time.August, 13, 22, 48, 45)},
{"modify=20150813224845;perm=fle;type=pdir;unique=119FBB87U4;UNIX.group=0;UNIX.mode=0755;UNIX.owner=0; ..", "..", 0, EntryTypeFolder, newTime(2015, time.August, 13, 22, 48, 45)},
{"modify=20150806235817;perm=fle;type=dir;unique=1B20F360U4;UNIX.group=0;UNIX.mode=0755;UNIX.owner=0; movies", "movies", 0, EntryTypeFolder, newTime(2015, time.August, 6, 23, 58, 17)},
{"modify=20150814172949;perm=flcdmpe;type=dir;unique=85A0C168U4;UNIX.group=0;UNIX.mode=0777;UNIX.owner=0; _upload", "_upload", 0, EntryTypeFolder, newTime(2015, time.August, 14, 17, 29, 49)},
{"modify=20150813175250;perm=adfr;size=951;type=file;unique=119FBB87UE;UNIX.group=0;UNIX.mode=0644;UNIX.owner=0; welcome.msg", "welcome.msg", 951, EntryTypeFile, newTime(2015, time.August, 13, 17, 52, 50)},
// Format and types have first letter UpperCase
{"Modify=20150813175250;Perm=adfr;Size=951;Type=file;Unique=119FBB87UE;UNIX.group=0;UNIX.mode=0644;UNIX.owner=0; welcome.msg", "welcome.msg", 951, EntryTypeFile, time.Date(2015, time.August, 13, 17, 52, 50, 0, time.UTC)},
{"Modify=20150813175250;Perm=adfr;Size=951;Type=file;Unique=119FBB87UE;UNIX.group=0;UNIX.mode=0644;UNIX.owner=0; welcome.msg", "welcome.msg", 951, EntryTypeFile, newTime(2015, time.August, 13, 17, 52, 50)},
// DOS DIR command output
{"08-07-15 07:50PM 718 Post_PRR_20150901_1166_265118_13049.dat", "Post_PRR_20150901_1166_265118_13049.dat", 718, EntryTypeFile, time.Date(2015, time.August, 7, 19, 50, 0, 0, time.UTC)},
{"08-10-15 02:04PM <DIR> Billing", "Billing", 0, EntryTypeFolder, time.Date(2015, time.August, 10, 14, 4, 0, 0, time.UTC)},
{"08-07-15 07:50PM 718 Post_PRR_20150901_1166_265118_13049.dat", "Post_PRR_20150901_1166_265118_13049.dat", 718, EntryTypeFile, newTime(2015, time.August, 7, 19, 50)},
{"08-10-15 02:04PM <DIR> Billing", "Billing", 0, EntryTypeFolder, newTime(2015, time.August, 10, 14, 4)},
// dir and file names that contain multiple spaces
{"drwxr-xr-x 3 110 1002 3 Dec 02 2009 spaces dir name", "spaces dir name", 0, EntryTypeFolder, time.Date(2009, time.December, 2, 0, 0, 0, 0, time.UTC)},
{"-rwxr-xr-x 3 110 1002 1234567 Dec 02 2009 file name", "file name", 1234567, EntryTypeFile, time.Date(2009, time.December, 2, 0, 0, 0, 0, time.UTC)},
{"-rwxr-xr-x 3 110 1002 1234567 Dec 02 2009 foo bar ", " foo bar ", 1234567, EntryTypeFile, time.Date(2009, time.December, 2, 0, 0, 0, 0, time.UTC)},
{"drwxr-xr-x 3 110 1002 3 Dec 02 2009 spaces dir name", "spaces dir name", 0, EntryTypeFolder, newTime(2009, time.December, 2)},
{"-rwxr-xr-x 3 110 1002 1234567 Dec 02 2009 file name", "file name", 1234567, EntryTypeFile, newTime(2009, time.December, 2)},
{"-rwxr-xr-x 3 110 1002 1234567 Dec 02 2009 foo bar ", " foo bar ", 1234567, EntryTypeFile, newTime(2009, time.December, 2)},
// Odd link count from hostedftp.com
{"-r-------- 0 user group 65222236 Feb 24 00:39 RegularFile", "RegularFile", 65222236, EntryTypeFile, time.Date(thisYear, time.February, 24, 0, 39, 0, 0, time.UTC)},
{"-r-------- 0 user group 65222236 Feb 24 00:39 RegularFile", "RegularFile", 65222236, EntryTypeFile, newTime(thisYear, time.February, 24, 0, 39)},
}
// Not supported, we expect a specific error message
@ -77,7 +84,7 @@ var listTestsFail = []unsupportedLine{
func TestParseValidListLine(t *testing.T) {
for _, lt := range listTests {
entry, err := parseListLine(lt.line)
entry, err := parseListLine(lt.line, now)
if err != nil {
t.Errorf("parseListLine(%v) returned err = %v", lt.line, err)
continue
@ -91,7 +98,7 @@ func TestParseValidListLine(t *testing.T) {
if entry.Size != lt.size {
t.Errorf("parseListLine(%v).Size = %v, want %v", lt.line, entry.Size, lt.size)
}
if entry.Time.Unix() != lt.time.Unix() {
if !entry.Time.Equal(lt.time) {
t.Errorf("parseListLine(%v).Time = %v, want %v", lt.line, entry.Time, lt.time)
}
}
@ -99,7 +106,7 @@ func TestParseValidListLine(t *testing.T) {
func TestParseUnsupportedListLine(t *testing.T) {
for _, lt := range listTestsFail {
_, err := parseListLine(lt.line)
_, err := parseListLine(lt.line, now)
if err == nil {
t.Errorf("parseListLine(%v) expected to fail", lt.line)
}
@ -108,3 +115,53 @@ func TestParseUnsupportedListLine(t *testing.T) {
}
}
}
func TestSettime(t *testing.T) {
tests := []struct {
line string
expected time.Time
}{
// this year, in the past
{"Feb 10 23:00", newTime(thisYear, time.February, 10, 23)},
// this year, less than six months in the future
{"Sep 10 22:59", newTime(thisYear, time.September, 10, 22, 59)},
// previous year, otherwise it would be more than 6 months in the future
{"Sep 10 23:00", newTime(previousYear, time.September, 10, 23)},
// far in the future
{"Jan 23 2019", newTime(2019, time.January, 23)},
}
for _, test := range tests {
entry := &Entry{}
entry.setTime(strings.Fields(test.line), now)
if !entry.Time.Equal(test.expected) {
t.Errorf("setTime(%v).Time = %v, want %v", test.line, entry.Time, test.expected)
}
}
}
// newTime builds a UTC time from the given year, month, day, hour and minute
func newTime(year int, month time.Month, day int, hourMinSec ...int) time.Time {
var hour, min, sec int
switch len(hourMinSec) {
case 0:
// nothing
case 3:
sec = hourMinSec[2]
fallthrough
case 2:
min = hourMinSec[1]
fallthrough
case 1:
hour = hourMinSec[0]
default:
panic("too many arguments")
}
return time.Date(year, month, day, hour, min, sec, 0, time.UTC)
}