157 lines
4.0 KiB
Go
157 lines
4.0 KiB
Go
|
package main
|
||
|
|
||
|
import (
|
||
|
"bytes"
|
||
|
"encoding/json"
|
||
|
"errors"
|
||
|
"fmt"
|
||
|
"net/http"
|
||
|
"net/http/httputil"
|
||
|
"os"
|
||
|
"regexp"
|
||
|
"strings"
|
||
|
"time"
|
||
|
|
||
|
"github.com/antchfx/htmlquery"
|
||
|
"github.com/gocolly/colly/v2"
|
||
|
"golang.org/x/net/html"
|
||
|
)
|
||
|
|
||
|
var (
|
||
|
client *http.Client = &http.Client{Timeout: 10 * time.Second}
|
||
|
genuisAPI = "https://api.genius.com/search"
|
||
|
GENIUS_API_TOKEN = ""
|
||
|
)
|
||
|
|
||
|
// makeRequest - generic function for making Http request
|
||
|
func makeRequest(endpoint string, apiToken string) (GeniusSearchOtherResponse, http.Header, error) {
|
||
|
var geniusApiResponse GeniusSearchOtherResponse
|
||
|
req, _ := http.NewRequest(http.MethodGet, endpoint, nil)
|
||
|
bearerHeader := fmt.Sprintf("Bearer %s", apiToken)
|
||
|
req.Header.Add("Authorization", bearerHeader)
|
||
|
|
||
|
reqDump, err := httputil.DumpRequestOut(req, true)
|
||
|
if err != nil {
|
||
|
return geniusApiResponse, nil, fmt.Errorf("error printing request: %+v", err)
|
||
|
}
|
||
|
|
||
|
if printDebug {
|
||
|
mainLog.Printf("REQUEST:\n%s\n", string(reqDump))
|
||
|
}
|
||
|
|
||
|
resp, err := client.Do(req)
|
||
|
if err != nil {
|
||
|
return geniusApiResponse, nil, fmt.Errorf("error receiving response: %+v", err)
|
||
|
}
|
||
|
defer resp.Body.Close()
|
||
|
|
||
|
respDump, err := httputil.DumpResponse(resp, true)
|
||
|
if err != nil {
|
||
|
return geniusApiResponse, nil, fmt.Errorf("error printing response: %+v", err)
|
||
|
}
|
||
|
|
||
|
if printDebug {
|
||
|
mainLog.Printf("RESPONSE:\n%s\n", string(respDump))
|
||
|
}
|
||
|
|
||
|
err = json.NewDecoder(resp.Body).Decode(&geniusApiResponse)
|
||
|
|
||
|
if err != nil {
|
||
|
return geniusApiResponse, nil, fmt.Errorf("error decoding response: %+v", err)
|
||
|
}
|
||
|
|
||
|
if printDebug {
|
||
|
mainLog.Printf("Decoded => %+v\n", geniusApiResponse)
|
||
|
}
|
||
|
|
||
|
return geniusApiResponse, resp.Header, nil
|
||
|
}
|
||
|
|
||
|
// getLyricUrl - get the url of the html page of the song to scrape
|
||
|
func getLyricUrl(searchTerm string) (string, error) {
|
||
|
ret, _, err := makeRequest(fmt.Sprintf("%s?q=%s", genuisAPI, searchTerm), GENIUS_API_TOKEN)
|
||
|
if err != nil {
|
||
|
return "", err
|
||
|
}
|
||
|
|
||
|
if len(ret.Response.Hits) == 0 {
|
||
|
return "", fmt.Errorf("there is no song on the Genius Api for the song: %s", searchTerm)
|
||
|
}
|
||
|
|
||
|
var songs []GeniusSearchResponseHit
|
||
|
for _, item := range ret.Response.Hits {
|
||
|
if item.Type == "song" {
|
||
|
songs = append(songs, item.Result)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Extract the first one as that is most of the time the correct one
|
||
|
return songs[0].URL, nil
|
||
|
}
|
||
|
|
||
|
// stripUnneccessaryChars - unescape all escaped characters
|
||
|
func stripUnneccessaryChars(nodeHtml string) string {
|
||
|
captureBr := regexp.MustCompile("<br/>")
|
||
|
// Replace all br with a new line
|
||
|
convertedBr := captureBr.ReplaceAllString(nodeHtml, "\n")
|
||
|
|
||
|
captureAngleBrackets := regexp.MustCompile(`(?i)\<(.*?)\>`)
|
||
|
|
||
|
// Remove all content between the angle brackets and the angle brackets themselves
|
||
|
removedAngleBrackets := captureAngleBrackets.ReplaceAllString(convertedBr, "")
|
||
|
|
||
|
// Unescape all escaped string
|
||
|
return html.UnescapeString(removedAngleBrackets)
|
||
|
}
|
||
|
|
||
|
// searchGenius - print to file scraped song from Genius Search API
|
||
|
func searchGenius(searchTerm string) error {
|
||
|
var errs error
|
||
|
lyricUrl, err := getLyricUrl(searchTerm)
|
||
|
if err != nil {
|
||
|
return errors.Join(err, errs)
|
||
|
}
|
||
|
|
||
|
if printDebug {
|
||
|
mainLog.Printf("lyricUrl: %s", lyricUrl)
|
||
|
}
|
||
|
|
||
|
var sb strings.Builder
|
||
|
|
||
|
c := colly.NewCollector()
|
||
|
c.OnHTML("body", func(e *colly.HTMLElement) {
|
||
|
doc, errT := htmlquery.Parse(bytes.NewReader(e.Response.Body))
|
||
|
if errT != nil {
|
||
|
errs = fmt.Errorf("error parsing html : %+v", errT)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// fmt.Println("doc: ", htmlquery.OutputHTML(doc, true))
|
||
|
|
||
|
// Use XPath to find nodes
|
||
|
nodes, errT := htmlquery.QueryAll(doc, "//div[contains(@class, 'Lyrics__Container')]") // Example XPath query
|
||
|
if err != nil {
|
||
|
errs = fmt.Errorf("error in query: %+v", errT)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
for _, node := range nodes {
|
||
|
sb.WriteString(stripUnneccessaryChars(htmlquery.OutputHTML(node, true)))
|
||
|
}
|
||
|
|
||
|
})
|
||
|
err = c.Visit(lyricUrl)
|
||
|
if err != nil {
|
||
|
return errors.Join(err, errs)
|
||
|
}
|
||
|
|
||
|
filename := fmt.Sprintf("%s_genius.txt", searchTerm)
|
||
|
err = os.WriteFile(filename, []byte(sb.String()), os.ModePerm)
|
||
|
if err != nil {
|
||
|
g := fmt.Errorf("could not write to %s: %v", filename, err)
|
||
|
return errors.Join(errs, g)
|
||
|
}
|
||
|
|
||
|
return errs
|
||
|
}
|