diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml index 7a46257..20a15a0 100644 --- a/.github/workflows/build-docker.yml +++ b/.github/workflows/build-docker.yml @@ -7,55 +7,105 @@ on: branches: - master +concurrency: + group: docker-publish-${{ github.ref }} + cancel-in-progress: true + +env: + IMAGE: zedeus/nitter + jobs: tests: uses: ./.github/workflows/run-tests.yml secrets: inherit - build-docker-amd64: + build: needs: [tests] - runs-on: ubuntu-24.04 + strategy: + fail-fast: false + matrix: + include: + - runner: ubuntu-24.04 + platform: linux/amd64 + - runner: ubuntu-24.04-arm + platform: linux/arm64 + runs-on: ${{ matrix.runner }} steps: + - name: Prepare platform name + run: echo "PLATFORM_PAIR=${platform//\//-}" >> "$GITHUB_ENV" + env: + platform: ${{ matrix.platform }} + - uses: actions/checkout@v6 + - name: Set up Docker Buildx - id: buildx uses: docker/setup-buildx-action@v3 with: version: latest + - name: Login to DockerHub uses: docker/login-action@v3 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - - name: Build and push AMD64 Docker image - uses: docker/build-push-action@v3 + + - name: Build and push by digest + id: build + uses: docker/build-push-action@v6 with: context: . file: ./Dockerfile - platforms: linux/amd64 - push: true - tags: zedeus/nitter:latest,zedeus/nitter:${{ github.sha }} + platforms: ${{ matrix.platform }} + outputs: type=image,name=${{ env.IMAGE }},push-by-digest=true,name-canonical=true,push=true + provenance: false + sbom: false - build-docker-arm64: - needs: [tests] - runs-on: ubuntu-24.04-arm + - name: Export digest + run: | + mkdir -p "${{ runner.temp }}/digests" + digest="${{ steps.build.outputs.digest }}" + touch "${{ runner.temp }}/digests/${digest#sha256:}" + + - name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: digests-${{ env.PLATFORM_PAIR }} + path: ${{ runner.temp }}/digests/* + if-no-files-found: error + retention-days: 1 + + # Combine the per-arch digests into one multi-arch manifest so that + # `docker pull zedeus/nitter:latest` serves the right image on any CPU. + merge: + needs: [build] + runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v6 + - name: Download digests + uses: actions/download-artifact@v4 + with: + path: ${{ runner.temp }}/digests + pattern: digests-* + merge-multiple: true + - name: Set up Docker Buildx - id: buildx uses: docker/setup-buildx-action@v3 with: version: latest + - name: Login to DockerHub uses: docker/login-action@v3 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - - name: Build and push ARM64 Docker image - uses: docker/build-push-action@v3 - with: - context: . - file: ./Dockerfile.arm64 - platforms: linux/arm64 - push: true - tags: zedeus/nitter:latest-arm64,zedeus/nitter:${{ github.sha }}-arm64 + + - name: Create manifest list and push + working-directory: ${{ runner.temp }}/digests + run: | + docker buildx imagetools create \ + -t ${{ env.IMAGE }}:latest \ + -t ${{ env.IMAGE }}:latest-arm64 \ + -t ${{ env.IMAGE }}:${{ github.sha }} \ + $(printf '${{ env.IMAGE }}@sha256:%s ' *) + + - name: Inspect image + run: docker buildx imagetools inspect ${{ env.IMAGE }}:${{ github.sha }} diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index f70e8e5..dcfff52 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -32,10 +32,12 @@ jobs: id: cache-nimble uses: actions/cache@v5 with: - path: ~/.nimble - key: ${{ matrix.nim }}-nimble-v3-${{ hashFiles('*.nimble') }} + path: | + ~/.nimble/pkgcache + ~/.nimble/packages_official.json + key: ${{ matrix.nim }}-nimble-v6-${{ hashFiles('*.nimble') }} restore-keys: | - ${{ matrix.nim }}-nimble-v3- + ${{ matrix.nim }}-nimble-v6- - name: Setup Nim uses: jiro4989/setup-nim-action@v2 @@ -103,10 +105,12 @@ jobs: - name: Cache Nimble Dependencies uses: actions/cache@v5 with: - path: ~/.nimble - key: 2.2.x-nimble-v3-${{ hashFiles('*.nimble') }} + path: | + ~/.nimble/pkgcache + ~/.nimble/packages_official.json + key: 2.2.x-nimble-v6-${{ hashFiles('*.nimble') }} restore-keys: | - 2.2.x-nimble-v3- + 2.2.x-nimble-v6- - name: Setup Nim uses: jiro4989/setup-nim-action@v2 @@ -115,6 +119,9 @@ jobs: use-nightlies: true repo-token: ${{ secrets.GITHUB_TOKEN }} + - name: Install Nimble dependencies + run: nimble install -y --depsOnly + - name: Download 2.2.x build artifact uses: actions/download-artifact@v4 with: @@ -130,10 +137,8 @@ jobs: sed -i 's/enableDebug = false/enableDebug = true/g' nitter.conf sed -i 's/maxRetries = 1/maxRetries = 10/g' nitter.conf - # Run both Nimble tasks concurrently - nim r tools/rendermd.nim & - nim r tools/gencss.nim & - wait + nim r tools/rendermd.nim + nim r tools/gencss.nim echo '${{ secrets.SESSIONS }}' | head -n1 echo '${{ secrets.SESSIONS }}' > ./sessions.jsonl diff --git a/.gitignore b/.gitignore index 09bdaa4..2e52163 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,6 @@ sessions.json* dump.rdb *.bak /tools/*.json* +nimbledeps/ +nimble.paths +nimble.develop diff --git a/Dockerfile b/Dockerfile index ab442ba..251b63a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM nimlang/nim:2.2.0-alpine-regular as nim +FROM nimlang/nim:2.2.6-alpine-regular as nim LABEL maintainer="setenforce@protonmail.com" RUN apk --no-cache add libsass-dev pcre @@ -15,7 +15,7 @@ RUN nimble build -d:danger -d:lto -d:strip --mm:refc \ FROM alpine:latest WORKDIR /src/ -RUN apk --no-cache add pcre ca-certificates +RUN apk --no-cache add pcre ca-certificates openssl COPY --from=nim /src/nitter/nitter ./ COPY --from=nim /src/nitter/nitter.example.conf ./nitter.conf COPY --from=nim /src/nitter/public ./public diff --git a/Dockerfile.arm64 b/Dockerfile.arm64 deleted file mode 100644 index 46352c7..0000000 --- a/Dockerfile.arm64 +++ /dev/null @@ -1,25 +0,0 @@ -FROM alpine:3.20.6 as nim -LABEL maintainer="setenforce@protonmail.com" - -RUN apk --no-cache add libsass-dev pcre gcc git libc-dev nim nimble - -WORKDIR /src/nitter - -COPY nitter.nimble . -RUN nimble install -y --depsOnly - -COPY . . -RUN nimble build -d:danger -d:lto -d:strip --mm:refc \ - && nimble scss \ - && nimble md - -FROM alpine:3.20.6 -WORKDIR /src/ -RUN apk --no-cache add pcre ca-certificates openssl -COPY --from=nim /src/nitter/nitter ./ -COPY --from=nim /src/nitter/nitter.example.conf ./nitter.conf -COPY --from=nim /src/nitter/public ./public -EXPOSE 8080 -RUN adduser -h /src/ -D -s /bin/sh nitter -USER nitter -CMD ./nitter diff --git a/README.md b/README.md index 05c2be4..0b0a14f 100644 --- a/README.md +++ b/README.md @@ -104,9 +104,9 @@ along with the scss and md files. # su nitter $ git clone https://github.com/zedeus/nitter $ cd nitter -$ nimble build -d:danger --mm:refc -$ nimble scss -$ nimble md +$ nimble -l build -d:danger --mm:refc +$ nimble -l scss +$ nimble -l md $ cp nitter.example.conf nitter.conf ``` @@ -123,12 +123,23 @@ performance reasons. Page for the Docker image: https://hub.docker.com/r/zedeus/nitter -#### NOTE: For ARM64 support, please use the separate ARM64 docker image: [`zedeus/nitter:latest-arm64`](https://hub.docker.com/r/zedeus/nitter/tags). +#### NOTE: The published image is multi-arch — `zedeus/nitter:latest` runs natively on both `amd64` and `arm64`. To run Nitter with Docker, you'll need to install and run Redis separately before you can run the container. See below for how to also run Redis using Docker. +First create your config file. The Docker commands mount it into the container, +so it has to exist on the host beforehand. If you've cloned the repo: + +```bash +cp nitter.example.conf nitter.conf +``` + +If you're using the prebuilt image without a local clone, download +[`nitter.example.conf`](https://raw.githubusercontent.com/zedeus/nitter/master/nitter.example.conf) +and save it as `nitter.conf` instead. + To build and run Nitter in Docker: ```bash @@ -136,8 +147,6 @@ docker build -t nitter:latest . docker run -v $(pwd)/nitter.conf:/src/nitter.conf -d --network host nitter:latest ``` -Note: For ARM64, use this Dockerfile: [`Dockerfile.arm64`](https://github.com/zedeus/nitter/blob/master/Dockerfile.arm64). - A prebuilt Docker image is provided as well: ```bash @@ -151,8 +160,11 @@ Change `redisHost` from `localhost` to `nitter-redis` in `nitter.conf`, then run docker-compose up -d ``` -Note the Docker commands expect a `nitter.conf` file in the directory you run -them. +Note the Docker commands mount `nitter.conf` (and `sessions.jsonl` for +docker-compose) from the directory you run them in. If a mounted file doesn't +exist, Docker silently creates a directory in its place and the container fails +with `not a directory: Are you trying to mount a directory onto a file`. Remove +that directory and create the file as shown above. ### systemd diff --git a/config.nims b/config.nims index 4a7af27..3ee4842 100644 --- a/config.nims +++ b/config.nims @@ -11,3 +11,7 @@ warning("HoleEnumConv", off) hint("XDeclaredButNotUsed", off) hint("XCannotRaiseY", off) hint("User", off) +# begin Nimble config (version 2) +when withDir(thisDir(), system.fileExists("nimble.paths")): + include "nimble.paths" +# end Nimble config diff --git a/nitter.nimble b/nitter.nimble index 8e17353..b36f498 100644 --- a/nitter.nimble +++ b/nitter.nimble @@ -11,19 +11,18 @@ bin = @["nitter"] # Dependencies requires "nim >= 2.0.0" -requires "jester#baca3f" -requires "karax#5cf360c" -requires "sass#7dfdd03" -requires "nimcrypto#a079df9" -requires "markdown#158efe3" +requires "jester == 0.6.0" +requires "karax == 1.5.0" +requires "sass == 0.2.0" +requires "nimcrypto == 0.7.3" +requires "markdown == 0.8.8" requires "packedjson#9e6fbb6" -requires "supersnappy#6c94198" -requires "https://github.com/zedeus/redpool#8b7c1db" -requires "https://github.com/zedeus/redis#d0a0e6f" -requires "zippy#ca5989a" -requires "flatty#e668085" -requires "jsony#1de1f08" -requires "oauth#b8c163b" +requires "supersnappy == 2.1.4" +requires "redpool == 0.2.2" +requires "zippy == 0.10.19" +requires "flatty == 0.4.0" +requires "jsony == 1.1.6" +requires "oauth == 0.11" # Tasks diff --git a/src/api.nim b/src/api.nim index c66ca3a..404a01e 100644 --- a/src/api.nim +++ b/src/api.nim @@ -11,11 +11,11 @@ proc genParams(variables: string; fieldToggles = ""): seq[(string, string)] = if fieldToggles.len > 0: result.add ("fieldToggles", fieldToggles) -proc apiUrl(endpoint, variables: string; fieldToggles = ""): ApiUrl = - return ApiUrl(endpoint: endpoint, params: genParams(variables, fieldToggles)) +proc apiUrl(endpoint, variables: string; fieldToggles = ""; skipTid = false): ApiUrl = + return ApiUrl(endpoint: endpoint, params: genParams(variables, fieldToggles), skipTid: skipTid) -proc apiReq(endpoint, variables: string; fieldToggles = ""): ApiReq = - let url = apiUrl(endpoint, variables, fieldToggles) +proc apiReq(endpoint, variables: string; fieldToggles = ""; skipTid = false): ApiReq = + let url = apiUrl(endpoint, variables, fieldToggles, skipTid) return ApiReq(cookie: url, oauth: url) proc mediaUrl(id, cursor: string; count=20): ApiReq = @@ -32,12 +32,7 @@ proc userTweetsUrl(id: string; cursor: string): ApiReq = # ) proc userTweetsAndRepliesUrl(id: string; cursor: string): ApiReq = - return apiReq(graphUserTweetsAndRepliesV2, restIdVars % [id, cursor, "20"]) - #let cookieVars = userTweetsAndRepliesVars % [id, cursor] - # result = ApiReq( - # cookie: apiUrl(graphUserTweetsAndReplies, cookieVars, userTweetsFieldToggles), - # oauth: apiUrl(graphUserTweetsAndRepliesV2, restIdVars % [id, cursor, "20"]) - # ) + return apiReq(graphUserTweetsAndRepliesV2, restIdVars % [id, cursor, "20"], skipTid=true) proc tweetDetailUrl(id: string; cursor: string): ApiReq = return apiReq(graphTweet, tweetVars % [id, cursor]) diff --git a/src/apiutils.nim b/src/apiutils.nim index bf6c4b9..8e1a852 100644 --- a/src/apiutils.nim +++ b/src/apiutils.nim @@ -1,6 +1,6 @@ # SPDX-License-Identifier: AGPL-3.0-only import httpclient, asyncdispatch, options, strutils, uri, times, math, tables -import jsony, packedjson, zippy, oauth1 +import jsony, packedjson, zippy, oauth/oauth1 import types, auth, consts, parserutils, http_pool, tid import experimental/types/common @@ -63,7 +63,7 @@ proc getOauthHeader(url, oauthToken, oauthTokenSecret: string): string = proc getCookieHeader(authToken, ct0: string): string = "auth_token=" & authToken & "; ct0=" & ct0 -proc genHeaders*(session: Session, url: Uri): Future[HttpHeaders] {.async.} = +proc genHeaders*(session: Session, url: Uri, skipTid: bool): Future[HttpHeaders] {.async.} = result = newHttpHeaders({ "accept": "*/*", "accept-encoding": "gzip", @@ -91,7 +91,7 @@ proc genHeaders*(session: Session, url: Uri): Future[HttpHeaders] {.async.} = result["sec-fetch-dest"] = "empty" result["sec-fetch-mode"] = "cors" result["sec-fetch-site"] = "same-origin" - if disableTid or "/1.1/" in url.path: + if disableTid or skipTid or "/1.1/" in url.path: result["authorization"] = bearerToken2 else: result["authorization"] = bearerToken @@ -115,7 +115,10 @@ template fetchImpl(result, fetchBody) {.dirty.} = try: var resp: AsyncResponse - let headers = await genHeaders(session, url) + let skipTid = case session.kind + of oauth: req.oauth.skipTid + of cookie: req.cookie.skipTid + let headers = await genHeaders(session, url, skipTid) pool.use(headers): template getContent = @@ -182,10 +185,12 @@ template fetchImpl(result, fetchBody) {.dirty.} = template retry(bod) {.dirty.} = var session: Session + var retrySuccess = false for i in 0 ..< maxRetries: try: session = nil bod + retrySuccess = true break except RateLimitError: let api = if session.isNil: req.cookie.endpoint @@ -199,6 +204,8 @@ template retry(bod) {.dirty.} = session = nil if retryDelayMs > 0: await sleepAsync(retryDelayMs) + if not retrySuccess: + raise rateLimitError() proc fetch*(req: ApiReq): Future[JsonNode] {.async.} = retry: diff --git a/src/consts.nim b/src/consts.nim index f903528..beefa57 100644 --- a/src/consts.nim +++ b/src/consts.nim @@ -104,7 +104,7 @@ const restIdVars* = """{ "rest_id": "$1", $2 "count": $3 -}""" +}""".replace(" ", "").replace("\n", "") userMediaVars* = """{ "userId": "$1", $2 diff --git a/src/formatters.nim b/src/formatters.nim index aef1c12..958e518 100644 --- a/src/formatters.nim +++ b/src/formatters.nim @@ -140,25 +140,30 @@ proc pageDesc*(user: User): string = "The latest tweets from " & user.fullname proc getJoinDate*(user: User): string = + if user.joinDate.year == 0: return "" user.joinDate.format("'Joined' MMMM YYYY") proc getJoinDateFull*(user: User): string = + if user.joinDate.year == 0: return "" user.joinDate.format("h:mm tt - d MMM YYYY") proc getTime*(tweet: Tweet): string = + if tweet.time.year == 0: return "" tweet.time.format("MMM d', 'YYYY' · 'h:mm tt' UTC'") proc getRfc822Time*(tweet: Tweet): string = + if tweet.time.year == 0: return "" tweet.time.format("ddd', 'dd MMM yyyy HH:mm:ss 'GMT'") -proc getShortTime*(tweet: Tweet): string = +proc getShortTime*(time: DateTime): string = + if time.year == 0: return "" let now = now() - let since = now - tweet.time + let since = now - time - if now.year != tweet.time.year: - result = tweet.time.format("d MMM yyyy") + if now.year != time.year: + result = time.format("d MMM yyyy") elif since.inDays >= 1: - result = tweet.time.format("MMM d") + result = time.format("MMM d") elif since.inHours >= 1: result = $since.inHours & "h" elif since.inMinutes >= 1: @@ -168,6 +173,9 @@ proc getShortTime*(tweet: Tweet): string = else: result = "now" +proc getShortTime*(tweet: Tweet): string = + getShortTime(tweet.time) + proc getDuration*(ms: int): string = let sec = int(round(ms / 1000)) diff --git a/src/nitter.nim b/src/nitter.nim index d1c0ef1..685b608 100644 --- a/src/nitter.nim +++ b/src/nitter.nim @@ -2,7 +2,7 @@ import asyncdispatch, strformat, logging from net import Port from htmlgen import a -from os import getEnv +from os import getEnv, normalizedPath import jester @@ -63,12 +63,17 @@ createDebugRouter(cfg) settings: port = Port(cfg.port) - staticDir = cfg.staticDir + staticDir = normalizedPath(cfg.staticDir) bindAddr = cfg.address reusePort = true + maxBody = 64 * 1024 routes: before: + # Reject malformed paths + if request.path.len == 0 or request.path[0] != '/': + halt Http400 + # skip all file URLs cond "." notin request.path applyUrlPrefs() diff --git a/src/parser.nim b/src/parser.nim index 902aebd..78d40f7 100644 --- a/src/parser.nim +++ b/src/parser.nim @@ -1,5 +1,5 @@ # SPDX-License-Identifier: AGPL-3.0-only -import strutils, options, times, math, tables +import strutils, options, times, math, tables, uri import packedjson, packedjson/deserialiser import types, parserutils, utils import experimental/parser/unifiedcard @@ -229,6 +229,10 @@ proc parseLegacyMediaEntities(js: JsonNode; result: var Tweet) = result.attribution = some(parseUser(user)) else: result.attribution = some(parseGraphUser(user)) + # Set attribution link from expanded_url (strip /video/N suffix) + let expanded = m{"expanded_url"}.getStr + if expanded.len > 0: + result.attributionLink = expanded.parseUri.path.replace("/video/1", "") of "animated_gif": result.media.addMedia(Gif( url: m{"video_info", "variants"}[0]{"url"}.getImageStr, @@ -237,11 +241,6 @@ proc parseLegacyMediaEntities(js: JsonNode; result: var Tweet) = )) else: discard - with url, m{"url"}: - if result.text.endsWith(url.getStr): - result.text.removeSuffix(url.getStr) - result.text = result.text.strip() - proc parseMediaEntities(js: JsonNode; result: var Tweet) = with mediaEntities, js{"media_entities"}: var parsedMedia: MediaEntities @@ -266,11 +265,9 @@ proc parseMediaEntities(js: JsonNode; result: var Tweet) = # Parse source user for video attribution with sourceUser, mediaEntity{"source_user_results", "result"}: if result.attribution.isNone: - let - expanded = mediaEntity{"expanded_url"}.getStr - pathStart = expanded.find('/', expanded.find("://") + 3) - if pathStart >= 0: - result.attributionLink = expanded[pathStart .. ^1].replace("/video/1", "") + let expanded = mediaEntity{"expanded_url"}.getStr + if expanded.len > 0: + result.attributionLink = expanded.parseUri.path.replace("/video/1", "") result.attribution = some(User( id: sourceUser{"rest_id"}.getStr, fullname: sourceUser{"core", "name"}.getStr, @@ -284,23 +281,9 @@ proc parseMediaEntities(js: JsonNode; result: var Tweet) = )) else: discard - if "expanded_url" in mediaEntity: - let expandedUrl = js.getExpandedUrl - if result.text.endsWith(expandedUrl): - result.text.removeSuffix(expandedUrl) - result.text = result.text.strip() - if mediaEntities.len > 0 and parsedMedia.len == mediaEntities.len: result.media = parsedMedia - # Remove media URLs from text - with mediaList, js{"legacy", "entities", "media"}: - for url in mediaList: - let expandedUrl = url.getExpandedUrl - if result.text.endsWith(expandedUrl): - result.text.removeSuffix(expandedUrl) - result.text = result.text.strip() - proc parsePromoVideo(js: JsonNode): Video = result = Video( thumb: js{"player_image_large"}.getImageVal, @@ -521,7 +504,7 @@ proc parseGraphTweet*(js: JsonNode): Tweet = "binding_values": %bindingObj } - var replyId = 0 + var replyId: int64 = 0 with restId, js{"reply_to_results", "rest_id"}: replyId = restId.getId @@ -555,6 +538,10 @@ proc parseGraphTweet*(js: JsonNode): Tweet = elif name.len > 0 and jsCard{"binding_values"}.notNull: result.card = some parseCard(jsCard, js{"url_entities"}) + parseMediaEntities(js, result) + if result.attribution.isNone: + parseLegacyMediaEntities(js{"legacy"}, result) + result.expandTweetEntitiesV2(js) # Strip video source URL from text (for videos from other tweets) @@ -585,6 +572,14 @@ proc parseGraphTweet*(js: JsonNode): Tweet = parseMediaEntities(js, result) + # Hide card if it's redundant with attribution (same video shown via embed) + if result.attribution.isSome and result.card.isSome: + let cardUri = get(result.card).url.parseUri + if cardUri.isTwitterUrl: + let cardPath = cardUri.path.replace("/video/1", "") + if cardPath.len > 0 and cardPath == result.attributionLink: + get(result.card).kind = hidden + # Handle retweets - check both legacy and top-level paths with reposts, js{"legacy", "repostedStatusResults"}: with rt, reposts{"result"}: diff --git a/src/parserutils.nim b/src/parserutils.nim index 8d6ea2e..860c9d2 100644 --- a/src/parserutils.nim +++ b/src/parserutils.nim @@ -202,19 +202,32 @@ proc extractHashtags(result: var seq[ReplaceSlice]; js: JsonNode) = proc replacedWith(runes: seq[Rune]; repls: openArray[ReplaceSlice]; textSlice: Slice[int]): string = + let + runeLen = runes.len + safeStart = max(0, textSlice.a) + safeEnd = min(runeLen, textSlice.b) + + var validRepls: seq[ReplaceSlice] + for rep in repls: + if rep.slice.a >= 0 and rep.slice.b >= 0 and rep.slice.b < runeLen and rep.slice.a <= rep.slice.b: + validRepls.add rep + template extractLowerBound(i: int; idx): int = - if i > 0: repls[idx].slice.b.succ else: textSlice.a + if i > 0: min(validRepls[idx].slice.b.succ, runeLen) else: safeStart result = newStringOfCap(runes.len) - for i, rep in repls: - result.add $runes[extractLowerBound(i, i - 1) ..< rep.slice.a] + for i, rep in validRepls: + let lower = extractLowerBound(i, i - 1) + if lower < rep.slice.a: + result.add $runes[lower ..< rep.slice.a] case rep.kind of rkHashtag: - let - name = $runes[rep.slice.a.succ .. rep.slice.b] - symbol = $runes[rep.slice.a] - result.add a(symbol & name, href = "/search?f=tweets&q=%23" & name) + if rep.slice.a.succ <= rep.slice.b: + let + name = $runes[rep.slice.a.succ .. rep.slice.b] + symbol = $runes[rep.slice.a] + result.add a(symbol & name, href = "/search?f=tweets&q=%23" & name) of rkMention: result.add a($runes[rep.slice], href = rep.url, title = rep.display) of rkUrl: @@ -222,8 +235,8 @@ proc replacedWith(runes: seq[Rune]; repls: openArray[ReplaceSlice]; of rkRemove: discard - let rest = extractLowerBound(repls.len, ^1) ..< textSlice.b - if rest.a <= rest.b: + let rest = extractLowerBound(validRepls.len, ^1) ..< safeEnd + if rest.a >= 0 and rest.a <= rest.b and rest.b <= runeLen: result.add $runes[rest] proc deduplicate(s: var seq[ReplaceSlice]) = @@ -326,7 +339,8 @@ proc expandTweetEntities*(tweet: Tweet; js: JsonNode) = replyTo = reply.getStr tweet.reply.add replyTo - tweet.expandTextEntities(entities, tweet.text, textSlice, replyTo, hasQuote or hasJobCard) + tweet.expandTextEntities(entities, tweet.text, textSlice, replyTo, + hasQuote or hasJobCard) proc expandTextEntitiesV2(tweet: Tweet; js: JsonNode; text: string; textSlice: Slice[int]; hasRedundantLink=false) = @@ -377,16 +391,19 @@ proc expandTweetEntitiesV2*(tweet: Tweet; js: JsonNode) = textSlice = textRange{0}.getInt .. textRange{1}.getInt hasQuote = "quoted_tweet_results" in js hasJobCard = tweet.card.isSome and get(tweet.card).kind == jobDetails + hasAttribution = tweet.attribution.isSome - tweet.expandTextEntitiesV2(js, tweet.text, textSlice, hasQuote or hasJobCard) + tweet.expandTextEntitiesV2(js, tweet.text, textSlice, + hasQuote or hasJobCard or hasAttribution) proc expandNoteTweetEntities*(tweet: Tweet; js: JsonNode) = let entities = ? js{"entity_set"} text = js{"text"}.getStr.multiReplace(("<", unicodeOpen), (">", unicodeClose)) textSlice = 0..text.runeLen + hasAttribution = tweet.attribution.isSome - tweet.expandTextEntities(entities, text, textSlice) + tweet.expandTextEntities(entities, text, textSlice, hasRedundantLink=hasAttribution) tweet.text = tweet.text.multiReplace((unicodeOpen, xmlOpen), (unicodeClose, xmlClose)) diff --git a/src/redis_cache.nim b/src/redis_cache.nim index bfd271f..e503e46 100644 --- a/src/redis_cache.nim +++ b/src/redis_cache.nim @@ -144,9 +144,10 @@ proc getCachedUsername*(userId: string): Future[string] {.async.} = else: let user = await getGraphUserById(userId) result = user.username - await setEx(key, baseCacheTime, result) - if result.len > 0 and user.id.len > 0: - await all(cacheUserId(result, user.id), cache(user)) + if result.len > 0: + await setEx(key, baseCacheTime, result) + if user.id.len > 0: + await all(cacheUserId(result, user.id), cache(user)) # proc getCachedTweet*(id: int64): Future[Tweet] {.async.} = # if id == 0: return diff --git a/src/routes/router_utils.nim b/src/routes/router_utils.nim index 379280c..8a26193 100644 --- a/src/routes/router_utils.nim +++ b/src/routes/router_utils.nim @@ -8,8 +8,9 @@ export utils, prefs, types, uri template savePref*(pref, value: string; req: Request; expire=false) = if not expire or pref in cookies(req): + let sameSite = if cfg.useHttps: None else: Lax setCookie(pref, value, daysForward(when expire: -10 else: 360), - httpOnly=true, secure=cfg.useHttps, sameSite=None, path="/") + httpOnly=true, secure=cfg.useHttps, sameSite=sameSite, path="/") template requestPrefs*(): untyped {.dirty.} = getPrefs(cookies(request), params(request)) diff --git a/src/types.nim b/src/types.nim index c755589..3604efb 100644 --- a/src/types.nim +++ b/src/types.nim @@ -16,6 +16,7 @@ type ApiUrl* = object endpoint*: string params*: seq[(string, string)] + skipTid*: bool ApiReq* = object oauth*: ApiUrl diff --git a/temp b/temp new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_card.py b/tests/test_card.py index f0a916c..16863ea 100644 --- a/tests/test_card.py +++ b/tests/test_card.py @@ -14,7 +14,7 @@ card = [ 'gist.github.com', True], ['NASA/status/2061872347477418301', - 'Nancy Grace Roman Space Telescope Mission - NASA Science', + 'Nancy Grace Roman Space Telescope - NASA Science', 'The Nancy Grace Roman Space Telescope will settle essential questions in the areas of dark energy, exoplanets, and astrophysics.', 'science.nasa.gov', True] ] diff --git a/tests/test_security.py b/tests/test_security.py new file mode 100644 index 0000000..65d5e9a --- /dev/null +++ b/tests/test_security.py @@ -0,0 +1,60 @@ +import subprocess +from parameterized import parameterized + +BASE_URL = 'http://localhost:8080' + + +def curl_status(url): + """Get HTTP status code using curl to avoid URL normalization by Python libs.""" + result = subprocess.run( + ['curl', '-s', '-o', '/dev/null', '-w', '%{http_code}', url], + capture_output=True, text=True, timeout=10 + ) + return int(result.stdout) + + +class TestMalformedPaths: + """Test that malformed paths don't crash the server. + + URLs like //foo are parsed as having 'foo' as the authority (host), + resulting in an empty path. Empty paths previously crashed jester's + static file handler. Now they return 400. + + URLs like //foo/bar are parsed as authority='foo', path='/bar', + so they route normally (not empty path). + """ + + @parameterized.expand([ + # These parse to empty paths -> 400 + ('//lefty_rae', 400), + ('//test', 400), + ('//anyuser', 400), + ]) + def test_empty_path_returns_400(self, path, expected_status): + """URLs that parse to empty paths should return 400, not crash.""" + status = curl_status(f'{BASE_URL}{path}') + assert status == expected_status, \ + f'Expected {expected_status} for {path}, got {status}' + + @parameterized.expand([ + ('/jack', 200), + ('/about', 200), + ('/', 200), + ]) + def test_normal_paths_work(self, path, expected_status): + """Normal paths should still work.""" + status = curl_status(f'{BASE_URL}{path}') + assert status == expected_status, \ + f'Expected {expected_status} for {path}, got {status}' + + def test_server_survives_malformed_requests(self): + """Server should handle malformed requests without crashing.""" + # These all parse to empty paths + malformed_paths = ['//a', '//b', '//c', '//user', '//test'] + for path in malformed_paths: + status = curl_status(f'{BASE_URL}{path}') + assert status == 400, f'Expected 400 for {path}, got {status}' + + # Verify server is still responding after malformed requests + status = curl_status(f'{BASE_URL}/') + assert status == 200, 'Server should still be alive'