Keep track of most recent item in the feed

And use it as termination sentinel.

Also add go module files.

Fixes #12
Fixes #15
pull/16/head
mpl 4 years ago
parent 6a2f034a9d
commit ecbc0acc43

@ -0,0 +1,8 @@
module github.com/mpl/gphotos-cdp
go 1.12
require (
github.com/chromedp/cdproto v0.0.0-20190812224334-39ef923dcb8d
github.com/chromedp/chromedp v0.4.0
)

@ -0,0 +1,16 @@
github.com/chromedp/cdproto v0.0.0-20190812224334-39ef923dcb8d h1:00kLGv5nKzpFchNhGDXDRbKtYx/WoT983Ka2t8/pzRE=
github.com/chromedp/cdproto v0.0.0-20190812224334-39ef923dcb8d/go.mod h1:0YChpVzuLJC5CPr+x3xkHN6Z8KOSXjNbL7qV8Wc4GW0=
github.com/chromedp/chromedp v0.4.0 h1:0AJC5ejETuh/6n7Tcsw4u4G0eKZkI9aVRwckWaImLUE=
github.com/chromedp/chromedp v0.4.0/go.mod h1:DC3QUn4mJ24dwjcaGQLoZrhm4X/uPHZ6spDbS2uFhm4=
github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee h1:s+21KNqlpePfkah2I+gwHF8xmJWRjooY+5248k6m4A0=
github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee/go.mod h1:L0fX3K22YWvt/FAX9NnzrNzcI4wNYi9Yku4O0LKYflo=
github.com/gobwas/pool v0.2.0 h1:QEmUOlnSjWtnpRGHF3SauEiOsy82Cup83Vf2LcMlnc8=
github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw=
github.com/gobwas/ws v1.0.2 h1:CoAavW/wd/kulfZmSIBt6p24n4j7tHgNVCjsfHVNUbo=
github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM=
github.com/knq/sysutil v0.0.0-20181215143952-f05b59f0f307 h1:vl4eIlySbjertFaNwiMjXsGrFVK25aOWLq7n+3gh2ls=
github.com/knq/sysutil v0.0.0-20181215143952-f05b59f0f307/go.mod h1:BjPj+aVjl9FW/cCGiF3nGh5v+9Gd3VCgBQbod/GlMaQ=
github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e h1:hB2xlXdHp/pmPZq0y3QnmWAArdw9PqbmotexnWx/FU8=
github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a h1:aYOabOQFp6Vj6W1F80affTUvO9UxmJRx8K0gsfABByQ=
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=

@ -34,7 +34,6 @@ import (
"sync"
"time"
"github.com/chromedp/cdproto/cdp"
"github.com/chromedp/cdproto/input"
"github.com/chromedp/cdproto/page"
"github.com/chromedp/chromedp"
@ -101,6 +100,9 @@ type Session struct {
// really) that was downloaded. If set, it is used as a sentinel, to indicate that
// we should skip dowloading all items older than this one.
lastDone string
// firstItem is the most recent item in the feed. It is determined at the
// beginning of the run, and is used as the final sentinel.
firstItem string
}
// getLastDone returns the URL of the most recent item that was downloaded in
@ -248,6 +250,10 @@ func (s *Session) login(ctx context.Context) error {
// 2) if the last session marked what was the most recent downloaded photo, it navigates to it
// 3) otherwise it jumps to the end of the timeline (i.e. the oldest photo)
func (s *Session) firstNav(ctx context.Context) error {
if err := s.setFirstItem(ctx); err != nil {
return err
}
if *startFlag != "" {
chromedp.Navigate(*startFlag).Do(ctx)
chromedp.WaitReady("body", chromedp.ByQuery).Do(ctx)
@ -270,29 +276,43 @@ func (s *Session) firstNav(ctx context.Context) error {
return nil
}
// navToEnd waits for the page to be ready to receive scroll key events, by
// trying to select an item with the right arrow key, and then scrolls down to the
// end of the page, i.e. to the oldest items.
func navToEnd(ctx context.Context) error {
// setFirstItem looks for the first item, and sets it as s.firstItem.
// We always run it first even for code paths that might not need s.firstItem,
// because we also run it for the side-effect of waiting for the first page load to
// be done, and to be ready to receive scroll key events.
func (s *Session) setFirstItem(ctx context.Context) error {
// wait for page to be loaded, i.e. that we can make an element active by using
// the right arrow key.
for {
chromedp.KeyEvent(kb.ArrowRight).Do(ctx)
time.Sleep(tick)
var ids []cdp.NodeID
attributes := make(map[string]string)
if err := chromedp.Run(ctx,
chromedp.NodeIDs(`document.activeElement`, &ids, chromedp.ByJSPath)); err != nil {
chromedp.Attributes(`document.activeElement`, &attributes, chromedp.ByJSPath)); err != nil {
return err
}
if len(ids) > 0 {
if *verboseFlag {
log.Printf("We are ready, because element %v is selected", ids[0])
}
break
if len(attributes) == 0 {
time.Sleep(tick)
continue
}
time.Sleep(tick)
photoHref, ok := attributes["href"]
if !ok || !strings.HasPrefix(photoHref, "./photo/") {
time.Sleep(tick)
continue
}
s.firstItem = strings.TrimPrefix(photoHref, "./photo/")
break
}
if *verboseFlag {
log.Printf("Page loaded, most recent item in the feed is: %s", s.firstItem)
}
return nil
}
// navToEnd scrolls down to the end of the page, i.e. to the oldest items.
func navToEnd(ctx context.Context) error {
// try jumping to the end of the page. detect we are there and have stopped
// moving when two consecutive screenshots are identical.
var previousScr, scr []byte
@ -593,6 +613,9 @@ func (s *Session) navN(N int) func(context.Context) error {
if N > 0 && n >= N {
break
}
if strings.HasSuffix(location, s.firstItem) {
break
}
if err := navLeft(ctx); err != nil {
return fmt.Errorf("error at %v: %v", location, err)

Loading…
Cancel
Save