lyricdownloader/usegoogle.go

120 lines
3.1 KiB
Go
Raw Permalink Normal View History

package main
import (
"bytes"
"errors"
"fmt"
"os"
"strings"
"github.com/antchfx/htmlquery"
"github.com/playwright-community/playwright-go"
"golang.org/x/net/html"
)
// recurseNodes - walk HtmlNodes of a Html document
func recurseNodes(top *html.Node, sb *strings.Builder) {
if top.Type == html.ElementNode && top.Data == "span" {
sb.WriteString(htmlquery.InnerText(top) + "\n")
}
for c := top.FirstChild; c != nil; c = c.NextSibling {
recurseNodes(c, sb)
}
}
// searchGenius - print to file scraped song from google search
func searchGoogle(song string) (err error) {
runOption := &playwright.RunOptions{
SkipInstallBrowsers: true,
}
tempErr := playwright.Install(runOption)
if tempErr != nil {
err = fmt.Errorf("could not install playwright dependencies: %v", tempErr)
return err
}
pw, tempErr := playwright.Run()
if tempErr != nil {
err = fmt.Errorf("could not start playwright: %v", tempErr)
return err
}
defer func(pw *playwright.Playwright) {
tempErr := pw.Stop()
if tempErr != nil {
e := fmt.Errorf("could not stop Playwright: %v", tempErr)
err = errors.Join(err, e)
}
}(pw)
option := playwright.BrowserTypeLaunchOptions{
Channel: playwright.String("chrome"),
Headless: playwright.Bool(false),
}
browser, tempErr := pw.Chromium.Launch(option)
if tempErr != nil {
err = fmt.Errorf("could not launch browser: %v", tempErr)
return err
}
defer func(browser playwright.Browser) {
tempErr = browser.Close()
if tempErr != nil {
e := fmt.Errorf("could not close browser: %v", tempErr)
err = errors.Join(err, e)
}
}(browser)
page, tempErr := browser.NewPage()
if tempErr != nil {
err = fmt.Errorf("could not create page: %v", tempErr)
return err
}
if _, tempErr := page.Goto(fmt.Sprintf("https://www.google.com/search?q=%ss+lyrics", song),
playwright.PageGotoOptions{
WaitUntil: playwright.WaitUntilStateLoad,
}); tempErr != nil {
err = fmt.Errorf("could not goto: %v", tempErr)
return err
}
tempErr = page.Locator("body").WaitFor(playwright.LocatorWaitForOptions{
State: playwright.WaitForSelectorStateVisible,
})
if tempErr != nil {
err = fmt.Errorf("could not wait for body: %v", tempErr)
return err
}
html, tempErr := page.Locator("html").InnerHTML()
if tempErr != nil {
err = fmt.Errorf("could not get innerHtml: %v", tempErr)
return err
}
doc, tempErr := htmlquery.Parse(bytes.NewReader([]byte(html)))
if tempErr != nil {
err = fmt.Errorf("could not parse the innerHtml: %v", tempErr)
return err
}
nodes, tempErr := htmlquery.QueryAll(doc, "//div[@data-lyricid]/div")
if tempErr != nil {
err = fmt.Errorf("could not get the nodes: %v", tempErr)
return err
}
var sb strings.Builder
for _, node := range nodes {
recurseNodes(node, &sb)
}
if sb.Len() > 0 {
if printDebug {
mainLog.Println("Writing lyrics from Google...")
}
filename := fmt.Sprintf("%s_google.txt", outputFile)
tempErr = os.WriteFile(filename, []byte(sb.String()), os.ModePerm)
if tempErr != nil {
err = fmt.Errorf("could not write to %s: %v", filename, err)
return err
}
} else {
mainLog.Println("Lyrics cannot be found...")
}
return nil
}