init snippets
This commit is contained in:
commit
cc9255ae51
5 changed files with 137 additions and 0 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
.scala-build/
|
||||||
|
.bsp/
|
97
misc/scrape.scala
Normal file
97
misc/scrape.scala
Normal file
|
@ -0,0 +1,97 @@
|
||||||
|
//> using dep org.http4s::http4s-ember-client:1.0.0-M43
|
||||||
|
//> using dep org.typelevel::log4cats-slf4j:2.7.0
|
||||||
|
import cats.effect.*
|
||||||
|
import cats.effect.std.*
|
||||||
|
import cats.syntax.all.*
|
||||||
|
import org.http4s.ember.client.EmberClientBuilder
|
||||||
|
import org.http4s.client.Client
|
||||||
|
import org.http4s.client.middleware.FollowRedirect
|
||||||
|
import org.http4s.EntityDecoder
|
||||||
|
import fs2.*
|
||||||
|
import fs2.io.file.*
|
||||||
|
import org.typelevel.log4cats.*
|
||||||
|
import org.typelevel.log4cats.slf4j.Slf4jFactory
|
||||||
|
|
||||||
|
object Scrape extends IOApp.Simple {
|
||||||
|
def run = {
|
||||||
|
saveImages().compile.drain.as(ExitCode.Success)
|
||||||
|
}
|
||||||
|
|
||||||
|
given LoggerFactory[IO] = Slf4jFactory.create[IO]
|
||||||
|
|
||||||
|
private def saveImages() = {
|
||||||
|
for {
|
||||||
|
client <- Stream.resource(EmberClientBuilder .default[IO] .build)
|
||||||
|
ref <- Stream.eval(Ref.of[IO, Set[String]](Set.empty))
|
||||||
|
scanedRef <- Stream.eval(Ref.of[IO, Set[String]](Set.empty))
|
||||||
|
_ <- loopScrape(scanedRef, client, "http://help.drip.im/hc", 0)
|
||||||
|
.evalMap(i => downloadImage(ref, client, i).handleErrorWith(e => IO.println(s"Error download ${i}, ${e}")))
|
||||||
|
} yield ()
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
def normalizeFilePath(p: String) = {
|
||||||
|
val regex = "/\\?[^/]*$"
|
||||||
|
p.replaceAll(regex, "")
|
||||||
|
}
|
||||||
|
|
||||||
|
private def downloadImage(savedUrls: Ref[IO, Set[String]], client: Client[IO], url: String): IO[Unit] = {
|
||||||
|
val FpRegex = "https?\\://[^/]+/(.*)$".r
|
||||||
|
val fClient = FollowRedirect(maxRedirects = 3)(client)
|
||||||
|
val path = url match {
|
||||||
|
case FpRegex(p) => normalizeFilePath(p)
|
||||||
|
}
|
||||||
|
val fp = Path(s"/Users/jilen/Downloads/drip_help_img/${path}")
|
||||||
|
for {
|
||||||
|
surls <- savedUrls.get
|
||||||
|
saved = surls.contains(url)
|
||||||
|
_ <- if(saved) IO.println(s":::${url} Already saved") else IO.println(s":::${url} saved to ${fp}")
|
||||||
|
_ <- fp.parent.map(Files[IO].createDirectories).getOrElse(IO.pure(())).whenA(!saved)
|
||||||
|
_ <- fClient.expect(url)(using EntityDecoder.binFile[IO](fp)).whenA(!saved)
|
||||||
|
_ <- savedUrls.update(_ + url).whenA(!saved)
|
||||||
|
} yield ()
|
||||||
|
}
|
||||||
|
|
||||||
|
private def loopScrape(scanedRef: Ref[IO, Set[String]],
|
||||||
|
client: Client[IO], rootUrl: String, depth: Int): Stream[IO, String] = {
|
||||||
|
|
||||||
|
Stream.eval(scanedRef.get).flatMap { scaned =>
|
||||||
|
if(scaned.contains(rootUrl)) {
|
||||||
|
Stream.empty
|
||||||
|
} else {
|
||||||
|
Stream.eval(scanedRef.update(_ + rootUrl))
|
||||||
|
>> Stream.eval(client.expect[String](rootUrl).handleError(o => "")).flatMap { c =>
|
||||||
|
val images = extractImages(c)
|
||||||
|
val remain = if(depth >= 2) {
|
||||||
|
Stream.empty
|
||||||
|
} else {
|
||||||
|
Stream.emits(extractLinks(c)).flatMap(l => loopScrape(scanedRef, client, l, depth + 1))
|
||||||
|
}
|
||||||
|
Stream.emits(images) ++ remain
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
private def extractImages(page: String): Seq[String] = {
|
||||||
|
|
||||||
|
val regex = "img\\s*src\\s*=\\s*\"(\\S+)\"".r
|
||||||
|
regex.findAllMatchIn(page).map { (m) =>
|
||||||
|
m.group(1)
|
||||||
|
}.toSeq.filterNot(s => s.contains("wx-static.drip.im") || s.contains("qiniu.drip.im"))
|
||||||
|
}
|
||||||
|
|
||||||
|
private def extractLinks(page: String): Seq[String] = {
|
||||||
|
val regex = "href[^\\>]*(?:help\\.drip\\.im)?(/hc/[\\w/]+)".r
|
||||||
|
regex.findAllMatchIn(page).map { (m) =>
|
||||||
|
val l = m.group(1)
|
||||||
|
val o = if(!l.startsWith("http")) s"http://help.drip.im$l" else l
|
||||||
|
o
|
||||||
|
}.toSeq.filter(_.contains("help.drip.im"))
|
||||||
|
}
|
||||||
|
}
|
21
std/macro.scala
Normal file
21
std/macro.scala
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
//> using scala 3.4.0
|
||||||
|
|
||||||
|
import scala.quoted.*
|
||||||
|
|
||||||
|
inline def printTree[A](inline x: A) = ${printTreeImpl[A]('x)}
|
||||||
|
|
||||||
|
private def printTreeImpl[A](x: Expr[A])(using Quotes) = {
|
||||||
|
import quotes.reflect.*
|
||||||
|
def print0(t: Term): Unit = {
|
||||||
|
t match {
|
||||||
|
case Inlined(_, _, n) =>
|
||||||
|
print0(n)
|
||||||
|
case Lambda(vals, body) =>
|
||||||
|
println(s"Lambda body ${body}")
|
||||||
|
case o =>
|
||||||
|
println(s"Lambda body ${o}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
print0(x.asTerm)
|
||||||
|
'{()}
|
||||||
|
}
|
3
std/newInstance.scala
Normal file
3
std/newInstance.scala
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
//> using scala 3.3.4
|
||||||
|
|
||||||
|
|
14
zd/gen_user_id.scala
Normal file
14
zd/gen_user_id.scala
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
//> using dep com.google.guava:guava:33.3.0-jre
|
||||||
|
|
||||||
|
import com.google.common.base.Charsets
|
||||||
|
import com.google.common.hash.Hashing
|
||||||
|
|
||||||
|
@main
|
||||||
|
def genId(id: String, key: String) = {
|
||||||
|
val sign = sha256(id + sha256(key))
|
||||||
|
println(s"${id}|${sign}")
|
||||||
|
}
|
||||||
|
|
||||||
|
def sha256(str: String) = {
|
||||||
|
Hashing.sha256().hashString(str, Charsets.UTF_8).toString
|
||||||
|
}
|
Loading…
Reference in a new issue