Able to scrape lyrics from Google Search

This commit is contained in:
iratusmachina 2024-01-13 17:55:50 -05:00 committed by OLUWADAMILOLA OKUSANYA
parent a17fe93f2e
commit 38f64dcb92
5 changed files with 224 additions and 15 deletions

1
.gitignore vendored
View File

@ -1,2 +1,3 @@
.env .env
bin bin
*.txt

10
README.md Normal file
View File

@ -0,0 +1,10 @@
# Lyricdownloader
This program is used to download lyrics for a song from the internet. The steps of operation are shown here:
1. It first opens a chrome window, searches for the lyrics , and copies the lyrics returned by Google Search to a file defined by you.
2. It then tries to get search for the same song using the Genius API. It then tries to compare the lyrics with the Genius one.
## Caveats
This program requires chrome to use, that is, you should have chrome installed to use this program.

17
go.mod
View File

@ -2,4 +2,19 @@ module lyricdownloader
go 1.20 go 1.20
require github.com/joho/godotenv v1.5.1 require (
github.com/antchfx/htmlquery v1.3.0
github.com/joho/godotenv v1.5.1
github.com/playwright-community/playwright-go v0.4001.0
golang.org/x/net v0.17.0
)
require (
github.com/antchfx/xpath v1.2.4 // indirect
github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964 // indirect
github.com/go-jose/go-jose/v3 v3.0.1 // indirect
github.com/go-stack/stack v1.8.1 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/text v0.13.0 // indirect
)

64
go.sum
View File

@ -1,2 +1,66 @@
github.com/antchfx/htmlquery v1.3.0 h1:5I5yNFOVI+egyia5F2s/5Do2nFWxJz41Tr3DyfKD25E=
github.com/antchfx/htmlquery v1.3.0/go.mod h1:zKPDVTMhfOmcwxheXUsx4rKJy8KEY/PU6eXr/2SebQ8=
github.com/antchfx/xpath v1.2.3/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
github.com/antchfx/xpath v1.2.4 h1:dW1HB/JxKvGtJ9WyVGJ0sIoEcqftV3SqIstujI+B9XY=
github.com/antchfx/xpath v1.2.4/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964 h1:y5HC9v93H5EPKqaS1UYVg1uYah5Xf51mBfIoWehClUQ=
github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964/go.mod h1:Xd9hchkHSWYkEqJwUGisez3G1QY8Ryz0sdWrLPMGjLk=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/go-jose/go-jose/v3 v3.0.1 h1:pWmKFVtt+Jl0vBZTIpz/eAKwsm6LkIxDVVbFHKkchhA=
github.com/go-jose/go-jose/v3 v3.0.1/go.mod h1:RNkWWRld676jZEYoV3+XK8L2ZnNSvIsxFMht0mSX+u8=
github.com/go-stack/stack v1.8.1 h1:ntEHSVwIt7PNXNpgPmVfMrNhLtgjlmnZha2kOpuRiDw=
github.com/go-stack/stack v1.8.1/go.mod h1:dcoOX6HbPZSZptuspn9bctJ+N/CnF5gGygcUP3XYfe4=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
github.com/mitchellh/go-ps v1.0.0 h1:i6ampVEEF4wQFF+bkYfwYgY+F/uYJDktmvLPf7qIgjc=
github.com/playwright-community/playwright-go v0.4001.0 h1:2cBiTIjCvFu7zUrZ48C0YC2DIp90Tbudueq4brUGjHM=
github.com/playwright-community/playwright-go v0.4001.0/go.mod h1:quEkYFrvvpQyGSxBjnYbGS52vrUDB2uaY1cOzkkSHCc=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190911031432-227b76d455e7/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws=
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.4.0/go.mod h1:9P2UbLfCdcvo3p/nzKvsmas4TnlujnuoV9hGgYzW1lQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=

View File

@ -1,20 +1,118 @@
package cmdline package cmdline
import ( import (
"bytes"
"flag" "flag"
"fmt" "fmt"
"log" "log"
"os" "os"
"strings"
"github.com/antchfx/htmlquery"
"github.com/playwright-community/playwright-go"
"golang.org/x/net/html"
) )
var ( var (
printDebug bool printDebug bool
outputFile string outputFile string
errorLog *log.Logger
mainLog *log.Logger
) )
// func searchGoogle(song string, lg *log.Logger) { func recurseNodes(top *html.Node, sb *strings.Builder) {
if top.Type == html.ElementNode && top.Data == "span" {
sb.WriteString(htmlquery.InnerText(top) + "\n")
}
for c := top.FirstChild; c != nil; c = c.NextSibling {
recurseNodes(c, sb)
}
}
// } func searchGoogle(song string) error {
runOption := &playwright.RunOptions{
SkipInstallBrowsers: true,
}
err := playwright.Install(runOption)
if err != nil {
return fmt.Errorf("could not install playwright dependencies: %v", err)
}
pw, err := playwright.Run()
if err != nil {
return fmt.Errorf("could not start playwright: %v", err)
}
defer func(pw *playwright.Playwright) error {
err := pw.Stop()
if err != nil {
return fmt.Errorf("could not stop Playwright: %v", err)
}
return nil
}(pw)
option := playwright.BrowserTypeLaunchOptions{
Channel: playwright.String("chrome"),
Headless: playwright.Bool(false),
}
browser, err := pw.Chromium.Launch(option)
if err != nil {
return fmt.Errorf("could not launch browser: %v", err)
}
defer func(browser playwright.Browser) error {
err = browser.Close()
if err != nil {
return fmt.Errorf("could not close browser: %v", err)
}
return nil
}(browser)
page, err := browser.NewPage()
if err != nil {
return fmt.Errorf("could not create page: %v", err)
}
if _, err := page.Goto(fmt.Sprintf("https://www.google.com/search?q=%ss+lyrics", song),
playwright.PageGotoOptions{
WaitUntil: playwright.WaitUntilStateLoad,
}); err != nil {
return fmt.Errorf("could not goto: %v", err)
}
err = page.Locator("body").WaitFor(playwright.LocatorWaitForOptions{
State: playwright.WaitForSelectorStateVisible,
})
if err != nil {
return fmt.Errorf("could not wait for body: %v", err)
}
html, err := page.Locator("html").InnerHTML()
if err != nil {
return fmt.Errorf("could not get innerHtml: %v", err)
}
doc, err := htmlquery.Parse(bytes.NewReader([]byte(html)))
if err != nil {
return fmt.Errorf("could not parse the innerHtml: %v", err)
}
nodes, err := htmlquery.QueryAll(doc, "//div[@data-lyricid]/div")
if err != nil {
return fmt.Errorf("could not get the nodes: %v", err)
}
var sb strings.Builder
for _, node := range nodes {
recurseNodes(node, &sb)
}
if sb.Len() > 0 {
if printDebug {
mainLog.Println("Writing lyrics from Google...")
}
filename := fmt.Sprintf("%s_google.txt", outputFile)
err = os.WriteFile(filename, []byte(sb.String()), os.ModePerm)
if err != nil {
return fmt.Errorf("could not write to %s: %v", filename, err)
}
} else {
mainLog.Println("Lyrics cannot be found...")
}
return nil
}
// func searchGenius(song string, lg *log.Logger) { // func searchGenius(song string, lg *log.Logger) {
@ -22,25 +120,28 @@ var (
func Main() int { func Main() int {
programName := os.Args[0] programName := os.Args[0]
errorLog := log.New(os.Stderr, "", 0) errorLog = log.New(os.Stderr, "", 0)
mainLog := log.New(os.Stdout, "", 0) mainLog = log.New(os.Stdout, "", 0)
flags := flag.NewFlagSet(os.Args[0], flag.ExitOnError) flags := flag.NewFlagSet(os.Args[0], flag.ExitOnError)
flags.Usage = func() { flags.Usage = func() {
out := flags.Output() out := flags.Output()
fmt.Fprintf(out, "Usage: %v [dir]\n\n", programName) fmt.Fprintf(out, "Usage: %v \n\n", programName)
fmt.Fprint(out, " [dir] is optional; if not passed, '.' is used.\n\n") fmt.Fprint(out, " This program is used to download lyrics for a song\n")
fmt.Fprint(out, " By default, the server listens on localhost:8080. Both the\n") fmt.Fprint(out, " from the internet. The steps of operation are shown here: \n\n")
fmt.Fprint(out, " host and the port are configurable with flags. Set the host\n") fmt.Fprint(out, " (a) It first opens a chrome window, searches for the lyrics \n")
fmt.Fprint(out, " to something else if you want the server to listen on a\n") fmt.Fprint(out, " , and copies the lyrics returned by Google Search to a \n")
fmt.Fprint(out, " specific network interface. Setting the port to 0 will\n") fmt.Fprint(out, " file defined by you.\n\n")
fmt.Fprint(out, " instruct the server to pick a random available port.\n\n") fmt.Fprint(out, " (b) It then tries to get search for the same song using the \n")
fmt.Fprint(out, " Genius API. It then tries to compare the lyrics with the \n")
fmt.Fprint(out, " Genius one.\n\n")
flags.PrintDefaults() flags.PrintDefaults()
} }
outputFlag := flags.String("output", "", "Lyrics filename") outputFlag := flags.String("output", "", "Optional. Lyrics filename")
verboseFlag := flags.Bool("verbose", false, "Turn on debug. Default is false.") verboseFlag := flags.Bool("verbose", false, "Optional. Turn on debug. Default is false.")
searchFlag := flags.String("search", "", "Name of song to search. If the name of the song is not a single word, put in quotes\"\"") searchFlag := flags.String("search", "", "Required. Name of song to search. If the name of the song is not a single word, put in quotes\"\"")
helpFlag := flags.Bool("help", false, "Optional. Print Usage")
flags.Parse(os.Args[1:]) flags.Parse(os.Args[1:])
@ -52,6 +153,17 @@ func Main() int {
allSetFlags := flagsSet(flags) allSetFlags := flagsSet(flags)
if allSetFlags["help"] && (allSetFlags["output"] || allSetFlags["search"] || allSetFlags["verbose"]) {
errorLog.Println("Error: if -help is set, -output, -search and -verbose must remain unset")
flags.Usage()
return 1
}
if *helpFlag {
flags.Usage()
return 0
}
songToSearch := *searchFlag songToSearch := *searchFlag
if len(songToSearch) == 0 { if len(songToSearch) == 0 {
@ -64,6 +176,7 @@ func Main() int {
outputFile = *outputFlag outputFile = *outputFlag
} else { } else {
mainLog.Printf("Using %s as the name of the file(s) for downloaded lyrics..\n", songToSearch) mainLog.Printf("Using %s as the name of the file(s) for downloaded lyrics..\n", songToSearch)
outputFile = fmt.Sprintf("%s_lyrics", songToSearch)
} }
printDebug = *verboseFlag printDebug = *verboseFlag
@ -71,6 +184,12 @@ func Main() int {
mainLog.Printf("Output flag: %s, Debug flag: %t, Search flag: %s\n", outputFile, printDebug, songToSearch) mainLog.Printf("Output flag: %s, Debug flag: %t, Search flag: %s\n", outputFile, printDebug, songToSearch)
} }
err := searchGoogle(songToSearch)
if err != nil {
errorLog.Printf("Err: %+v", err)
return 1
}
return 0 return 0
} }