init snippets

This commit is contained in:
jilen 2024-11-02 20:34:57 +08:00
commit cc9255ae51
5 changed files with 137 additions and 0 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
.scala-build/
.bsp/

97
misc/scrape.scala Normal file
View file

@ -0,0 +1,97 @@
//> using dep org.http4s::http4s-ember-client:1.0.0-M43
//> using dep org.typelevel::log4cats-slf4j:2.7.0
import cats.effect.*
import cats.effect.std.*
import cats.syntax.all.*
import org.http4s.ember.client.EmberClientBuilder
import org.http4s.client.Client
import org.http4s.client.middleware.FollowRedirect
import org.http4s.EntityDecoder
import fs2.*
import fs2.io.file.*
import org.typelevel.log4cats.*
import org.typelevel.log4cats.slf4j.Slf4jFactory
object Scrape extends IOApp.Simple {
def run = {
saveImages().compile.drain.as(ExitCode.Success)
}
given LoggerFactory[IO] = Slf4jFactory.create[IO]
private def saveImages() = {
for {
client <- Stream.resource(EmberClientBuilder .default[IO] .build)
ref <- Stream.eval(Ref.of[IO, Set[String]](Set.empty))
scanedRef <- Stream.eval(Ref.of[IO, Set[String]](Set.empty))
_ <- loopScrape(scanedRef, client, "http://help.drip.im/hc", 0)
.evalMap(i => downloadImage(ref, client, i).handleErrorWith(e => IO.println(s"Error download ${i}, ${e}")))
} yield ()
}
def normalizeFilePath(p: String) = {
val regex = "/\\?[^/]*$"
p.replaceAll(regex, "")
}
private def downloadImage(savedUrls: Ref[IO, Set[String]], client: Client[IO], url: String): IO[Unit] = {
val FpRegex = "https?\\://[^/]+/(.*)$".r
val fClient = FollowRedirect(maxRedirects = 3)(client)
val path = url match {
case FpRegex(p) => normalizeFilePath(p)
}
val fp = Path(s"/Users/jilen/Downloads/drip_help_img/${path}")
for {
surls <- savedUrls.get
saved = surls.contains(url)
_ <- if(saved) IO.println(s":::${url} Already saved") else IO.println(s":::${url} saved to ${fp}")
_ <- fp.parent.map(Files[IO].createDirectories).getOrElse(IO.pure(())).whenA(!saved)
_ <- fClient.expect(url)(using EntityDecoder.binFile[IO](fp)).whenA(!saved)
_ <- savedUrls.update(_ + url).whenA(!saved)
} yield ()
}
private def loopScrape(scanedRef: Ref[IO, Set[String]],
client: Client[IO], rootUrl: String, depth: Int): Stream[IO, String] = {
Stream.eval(scanedRef.get).flatMap { scaned =>
if(scaned.contains(rootUrl)) {
Stream.empty
} else {
Stream.eval(scanedRef.update(_ + rootUrl))
>> Stream.eval(client.expect[String](rootUrl).handleError(o => "")).flatMap { c =>
val images = extractImages(c)
val remain = if(depth >= 2) {
Stream.empty
} else {
Stream.emits(extractLinks(c)).flatMap(l => loopScrape(scanedRef, client, l, depth + 1))
}
Stream.emits(images) ++ remain
}
}
}
}
private def extractImages(page: String): Seq[String] = {
val regex = "img\\s*src\\s*=\\s*\"(\\S+)\"".r
regex.findAllMatchIn(page).map { (m) =>
m.group(1)
}.toSeq.filterNot(s => s.contains("wx-static.drip.im") || s.contains("qiniu.drip.im"))
}
private def extractLinks(page: String): Seq[String] = {
val regex = "href[^\\>]*(?:help\\.drip\\.im)?(/hc/[\\w/]+)".r
regex.findAllMatchIn(page).map { (m) =>
val l = m.group(1)
val o = if(!l.startsWith("http")) s"http://help.drip.im$l" else l
o
}.toSeq.filter(_.contains("help.drip.im"))
}
}

21
std/macro.scala Normal file
View file

@ -0,0 +1,21 @@
//> using scala 3.4.0
import scala.quoted.*
inline def printTree[A](inline x: A) = ${printTreeImpl[A]('x)}
private def printTreeImpl[A](x: Expr[A])(using Quotes) = {
import quotes.reflect.*
def print0(t: Term): Unit = {
t match {
case Inlined(_, _, n) =>
print0(n)
case Lambda(vals, body) =>
println(s"Lambda body ${body}")
case o =>
println(s"Lambda body ${o}")
}
}
print0(x.asTerm)
'{()}
}

3
std/newInstance.scala Normal file
View file

@ -0,0 +1,3 @@
//> using scala 3.3.4

14
zd/gen_user_id.scala Normal file
View file

@ -0,0 +1,14 @@
//> using dep com.google.guava:guava:33.3.0-jre
import com.google.common.base.Charsets
import com.google.common.hash.Hashing
@main
def genId(id: String, key: String) = {
val sign = sha256(id + sha256(key))
println(s"${id}|${sign}")
}
def sha256(str: String) = {
Hashing.sha256().hashString(str, Charsets.UTF_8).toString
}