From 8e3455a7901aad415eded19218edbed826a2fca5 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Fri, 26 Nov 2021 09:12:46 -0700 Subject: [PATCH] Automate updates for list of searx instances (#3) * Create nightly update workflow for instances A nightly GitHub Actions CI workflow has been added to fetch new instances of supported services within Farside. Currently only Searx is supported, but obviously others could be added if there are similarly easy ways to fetch and filter instances programmatically. services.json has also been updated with the initial results of the workflow script. * Set headers for every HTTPoison request This serves as a workaround for bot blocking via filtron. * Expand filtering of searx instances New filter enforces: - No Cloudflare - Good TLS config - Good HTTP header config - Vanilla instances or forks - Instances with 100% search success --- .github/workflows/update-instances.yml | 68 +++++ config/config.exs | 8 +- lib/farside/instances.ex | 3 +- services.json | 333 ++++++++++++------------- 4 files changed, 243 insertions(+), 169 deletions(-) create mode 100644 .github/workflows/update-instances.yml diff --git a/.github/workflows/update-instances.yml b/.github/workflows/update-instances.yml new file mode 100644 index 0000000..d16bddd --- /dev/null +++ b/.github/workflows/update-instances.yml @@ -0,0 +1,68 @@ +on: + schedule: + - cron: '0 0 * * *' + +jobs: + update-instances: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Install dependencies + run: sudo apt-get install -y jq + - name: Fetch instances + run: | + function apply_update() { + mv services-tmp.json services.json + rm -f *-tmp.json + + # Ensure no trailing slashes for any instance + sed -i '' 's/\/"/"/g' services.json + } + + # ============================================================== + # Git config + # ============================================================== + git config --global user.name github-actions + git config --global user.email 41898282+github-actions[bot]@users.noreply.github.com + git remote set-url origin git@github.com:benbusby/farside.git + git checkout main + + # ============================================================== + # Searx update + # ============================================================== + curl -s https://searx.space/data/instances.json | \ + jq '[ + .instances | + to_entries[] | + select(.value.network_type == "normal") | + select(.value.version | . != null) | + select(.value.version | startswith("1.0.0")) | + select(.value.network.asn_privacy == 0) | + select(.value.http.error == null) | + select(.value.tls.grade == "A+" or .value.tls.grade == "A") | + select(.value.http.grade == "A+" or .value.http.grade == "A") | + select(.value.html.grade == "V" or .value.html.grade == "F") | + .key + ] | sort' > searx-tmp.json + + jq --slurpfile searx searx-tmp.json \ + '( .[] | select(.type == "searx") ) + .instances |= $searx[0]' services.json > services-tmp.json + + apply_update + + # ============================================================== + # TODO: Update instances for other services + # ============================================================== + + # ============================================================== + # Push changes + # ============================================================== + if [[ $(git diff-index --quiet HEAD) ]]; then + echo "No updates" + else + git add services.json + git commit -m '[CI] Auto update instances' + git push + fi diff --git a/config/config.exs b/config/config.exs index cbed1ef..36ede2f 100644 --- a/config/config.exs +++ b/config/config.exs @@ -8,4 +8,10 @@ config :farside, fallback_suffix: "-fallback", previous_suffix: "-previous", services_json: "services.json", - index: "index.eex" + index: "index.eex", + headers: [ + {"User-Agent", "Mozilla/5.0 (Linux x86_64; rv:94.0) Gecko/20100101 Firefox/94.0"}, + {"Accept", "text/html"}, + {"Accept-Language", "en-US,en;q=0.5"}, + {"Accept-Encoding", "gzip, deflate, br"} + ] diff --git a/lib/farside/instances.ex b/lib/farside/instances.ex index 203ddfa..a451026 100644 --- a/lib/farside/instances.ex +++ b/lib/farside/instances.ex @@ -3,6 +3,7 @@ defmodule Farside.Instances do @update_file Application.fetch_env!(:farside, :update_file) @services_json Application.fetch_env!(:farside, :services_json) @service_prefix Application.fetch_env!(:farside, :service_prefix) + @headers Application.fetch_env!(:farside, :headers) def sync() do File.rename(@update_file, "#{@update_file}-prev") @@ -21,7 +22,7 @@ defmodule Farside.Instances do System.get_env("FARSIDE_TEST") -> :good true -> - case HTTPoison.get(url) do + case HTTPoison.get(url, @headers) do {:ok, %HTTPoison.Response{status_code: 200}} -> # TODO: Add validation of results, not just status code :good diff --git a/services.json b/services.json index 995bbee..8ee082c 100644 --- a/services.json +++ b/services.json @@ -1,169 +1,168 @@ [ - { - "type": "libreddit", - "test_url": "/r/popular", - "fallback": "https://libredd.it", - "instances": [ - "https://libreddit.albony.xyz", - "https://libredd.it", - "https://libreddit.spike.codes", - "https://libreddit.dothq.co", - "https://libreddit.kavin.rocks", - "https://libreddit.40two.app", - "https://reddit.invak.id", - "https://reddit.phii.me", - "https://lr.riverside.rocks", - "https://libreddit.silkky.cloud", - "https://libreddit.database.red", - "https://libreddit.exonip.de", - "https://libreddit.domain.glass", - "https://libreddit.sugoma.tk", - "https://libreddit.trevorthalacker.com", - "https://reddit.artemislena.eu", - "https://r.nf", - "https://libreddit.awesomehub.io", - "https://libreddit.some-things.org", - "https://reddit.stuehieyr.com", - "https://lr.mint.lgbt", - "https://libreddit.alefvanoon.xyz", - "https://libreddit.igna.rocks", - "https://libreddit.autarkic.org", - "https://libreddit.flux.industries", - "https://libreddit.drivet.xyz", - "https://lr.oversold.host", - "https://libreddit.de" - ] - }, - { - "type": "bibliogram", - "test_url": "/u/officialrickastley", - "fallback": "https://bibliogram.art", - "instances": [ - "https://bibliogram.art", - "https://bibliogram.snopyta.org", - "https://bibliogram.pussthecat.org", - "https://bibliogram.1d4.us", - "https://insta.trom.tf", - "https://bibliogram.hamster.dance", - "https://bib.actionsack.com", - "https://biblio.alefvanoon.xyz" - ] - }, - { - "type": "invidious", - "test_url": "/watch?v=eBGIQ7ZuuiU", - "fallback": "https://invidious.snopyta.org", - "instances": [ - "https://yewtu.be", - "https://invidious.snopyta.org", - "https://invidious.kavin.rocks", - "https://vid.puffyan.org", - "https://invidious-us.kavin.rocks", - "https://inv.riverside.rocks", - "https://invidio.xamh.de", - "https://vid.mint.lgbt", - "https://invidious-jp.kavin.rocks", - "https://invidious.hub.ne.kr", - "https://yt.artemislena.eu", - "https://youtube.076.ne.jp", - "https://invidious.namazso.eu" - ] - }, - { - "type": "nitter", - "test_url": "/twitter", - "fallback": "https://nitter.net", - "instances": [ - "https://nitter.net", - "https://nitter.42l.fr", - "https://nitter.pussthecat.org", - "https://nitter.nixnet.services", - "https://nitter.fdn.fr", - "https://nitter.1d4.us", - "https://nitter.kavin.rocks", - "https://nitter.vxempire.xyz", - "https://nitter.unixfox.eu", - "https://nitter.domain.glass", - "https://nitter.eu", - "https://nitter.namazso.eu", - "https://nitter.mailstation.de", - "https://nitter.actionsack.com", - "https://nitter.cattube.org", - "https://birdsite.xanny.family", - "https://nitter.hu", - "https://nitter.exonip.de", - "https://twitr.gq", - "https://nitter.moomoo.me", - "https://bird.trom.tf", - "https://nitter.it", - "https://twitter.censors.us", - "https://nitter.grimneko.de", - "https://nitter.koyu.space", - "https://nitter.alefvanoon.xyz", - "https://nitter.ir", - "https://nitter.autarkic.org", - "https://n.0x0.st", - "https://n.hyperborea.cloud", - "https://nitter.ca", - "https://twitter.076.ne.jp", - "https://lu-nitter.resolv.ee", - "https://is-nitter.resolv.ee", - "https://cy-nitter.resolv.ee", - "https://tweet.lambda.dance" - ] - }, - { - "type": "whoogle", - "test_url": "/search?q=github", - "fallback": "https://search.garudalinux.org", - "instances": [ - "https://search.albony.xyz", - "https://whoogle.sdf.org", - "https://whoogle.kavin.rocks", - "https://search.garudalinux.org", - "https://whooglesearch.net", - "https://search.flawcra.cc", - "https://search.exonip.de", - "https://s.alefvanoon.xyz", - "https://search.flux.industries", - "https://www.whooglesearch.ml" - ] - }, - { - "type": "searx", - "test_url": "/search?q=github", - "fallback": "https://searx.be", - "instances": [ - "https://paulgo.io", - "https://search.asynchronousexchange.com", - "https://anon.sx", - "https://searx.be", - "https://searx.gnous.eu", - "https://xeek.com", - "https://searx.bar", - "https://sx.fedi.tech", - "https://searx.tiekoetter.com", - "https://search.disroot.org", - "https://northboot.xyz", - "https://searx.fmac.xyz", - "https://metasearch.nl", - "https://searx.nevrlands.de", - "https://search.mdosch.de", - "https://searx.rasp.fr", - "https://searx.zackptg5.com", - "https://procurx.pt", - "https://searx2.zackptg5.com", - "https://searx.pwoss.org", - "https://search.076.ne.jp/searx", - "https://darmarit.org/searx", - "https://suche.uferwerk.org", - "https://searx.nakhan.ne", - "https://suche.dasnetzundich.de", - "https://search.antonkling.se", - "https://jsearch.pw", - "https://searx.hummel-web.at", - "https://search.mdosch.de", - "https://searx.ru" - ] - } + { + "type": "libreddit", + "test_url": "/r/popular", + "fallback": "https://libredd.it", + "instances": [ + "https://libreddit.albony.xyz", + "https://libredd.it", + "https://libreddit.spike.codes", + "https://libreddit.dothq.co", + "https://libreddit.kavin.rocks", + "https://libreddit.40two.app", + "https://reddit.invak.id", + "https://reddit.phii.me", + "https://lr.riverside.rocks", + "https://libreddit.silkky.cloud", + "https://libreddit.database.red", + "https://libreddit.exonip.de", + "https://libreddit.domain.glass", + "https://libreddit.sugoma.tk", + "https://libreddit.trevorthalacker.com", + "https://reddit.artemislena.eu", + "https://r.nf", + "https://libreddit.awesomehub.io", + "https://libreddit.some-things.org", + "https://reddit.stuehieyr.com", + "https://lr.mint.lgbt", + "https://libreddit.alefvanoon.xyz", + "https://libreddit.igna.rocks", + "https://libreddit.autarkic.org", + "https://libreddit.flux.industries", + "https://libreddit.drivet.xyz", + "https://lr.oversold.host", + "https://libreddit.de" + ] + }, + { + "type": "bibliogram", + "test_url": "/u/officialrickastley", + "fallback": "https://bibliogram.art", + "instances": [ + "https://bibliogram.art", + "https://bibliogram.snopyta.org", + "https://bibliogram.pussthecat.org", + "https://bibliogram.1d4.us", + "https://insta.trom.tf", + "https://bibliogram.hamster.dance", + "https://bib.actionsack.com", + "https://biblio.alefvanoon.xyz" + ] + }, + { + "type": "invidious", + "test_url": "/watch?v=eBGIQ7ZuuiU", + "fallback": "https://invidious.snopyta.org", + "instances": [ + "https://yewtu.be", + "https://invidious.snopyta.org", + "https://invidious.kavin.rocks", + "https://vid.puffyan.org", + "https://invidious-us.kavin.rocks", + "https://inv.riverside.rocks", + "https://invidio.xamh.de", + "https://vid.mint.lgbt", + "https://invidious-jp.kavin.rocks", + "https://invidious.hub.ne.kr", + "https://yt.artemislena.eu", + "https://youtube.076.ne.jp", + "https://invidious.namazso.eu" + ] + }, + { + "type": "nitter", + "test_url": "/twitter", + "fallback": "https://nitter.net", + "instances": [ + "https://nitter.net", + "https://nitter.42l.fr", + "https://nitter.pussthecat.org", + "https://nitter.nixnet.services", + "https://nitter.fdn.fr", + "https://nitter.1d4.us", + "https://nitter.kavin.rocks", + "https://nitter.vxempire.xyz", + "https://nitter.unixfox.eu", + "https://nitter.domain.glass", + "https://nitter.eu", + "https://nitter.namazso.eu", + "https://nitter.mailstation.de", + "https://nitter.actionsack.com", + "https://nitter.cattube.org", + "https://birdsite.xanny.family", + "https://nitter.hu", + "https://nitter.exonip.de", + "https://twitr.gq", + "https://nitter.moomoo.me", + "https://bird.trom.tf", + "https://nitter.it", + "https://twitter.censors.us", + "https://nitter.grimneko.de", + "https://nitter.koyu.space", + "https://nitter.alefvanoon.xyz", + "https://nitter.ir", + "https://nitter.autarkic.org", + "https://n.0x0.st", + "https://n.hyperborea.cloud", + "https://nitter.ca", + "https://twitter.076.ne.jp", + "https://lu-nitter.resolv.ee", + "https://is-nitter.resolv.ee", + "https://cy-nitter.resolv.ee", + "https://tweet.lambda.dance" + ] + }, + { + "type": "whoogle", + "test_url": "/search?q=github", + "fallback": "https://search.garudalinux.org", + "instances": [ + "https://search.albony.xyz", + "https://whoogle.sdf.org", + "https://whoogle.kavin.rocks", + "https://search.garudalinux.org", + "https://whooglesearch.net", + "https://search.flawcra.cc", + "https://search.exonip.de", + "https://s.alefvanoon.xyz", + "https://search.flux.industries", + "https://www.whooglesearch.ml" + ] + }, + { + "type": "searx", + "test_url": "/search?q=github", + "fallback": "https://searx.be", + "instances": [ + "https://anon.sx", + "https://northboot.xyz", + "https://paulgo.io", + "https://procurx.pt", + "https://s.zhaocloud.net", + "https://search.asynchronousexchange.com", + "https://search.bluelock.org", + "https://search.mdosch.de", + "https://searx.bar", + "https://searx.be", + "https://searx.divided-by-zero.eu", + "https://searx.fmac.xyz", + "https://searx.hummel-web.at", + "https://searx.nevrlands.de", + "https://searx.prvcy.eu", + "https://searx.rasp.fr", + "https://searx.ru", + "https://searx.silkky.cloud", + "https://searx.sp-codes.de", + "https://searx.stuehieyr.com", + "https://searx.theanonymouse.xyz", + "https://searx.tiekoetter.com", + "https://searx.tux.land", + "https://searx.tuxcloud.net", + "https://searx.webheberg.info", + "https://searx.xyz", + "https://searx2.zackptg5.com", + "https://swag.pw", + "https://sx.fedi.tech" + ] + } ]