Package crawlercommons.urlfrontier
Interface URLFrontierGrpc.AsyncService
- All Known Implementing Classes:
URLFrontierGrpc.URLFrontierImplBase
- Enclosing class:
- URLFrontierGrpc
public static interface URLFrontierGrpc.AsyncService
-
Method Summary
Modifier and TypeMethodDescriptiondefault voidblockQueueUntil(Urlfrontier.BlockQueueParams request, io.grpc.stub.StreamObserver<Urlfrontier.Empty> responseObserver) * Block a queue from sending URLs; the argument is the number of seconds of UTC time since Unix epoch 1970-01-01T00:00:00Z.default voiddeleteCrawl(Urlfrontier.DeleteCrawlMessage request, io.grpc.stub.StreamObserver<Urlfrontier.Long> responseObserver) * Delete an entire crawl, returns the number of URLs removed this way *default voiddeleteQueue(Urlfrontier.QueueWithinCrawlParams request, io.grpc.stub.StreamObserver<Urlfrontier.Long> responseObserver) * Delete the queue based on the key in parameter, returns the number of URLs removed this way *default voidgetActive(Urlfrontier.Local request, io.grpc.stub.StreamObserver<Urlfrontier.Boolean> responseObserver) * Returns true if the crawl is active, false if it has been deactivated with SetActive(Boolean) *default voidgetStats(Urlfrontier.QueueWithinCrawlParams request, io.grpc.stub.StreamObserver<Urlfrontier.Stats> responseObserver) * Return stats for a specific queue or an entire crawl.default voidgetURLs(Urlfrontier.GetParams request, io.grpc.stub.StreamObserver<Urlfrontier.URLInfo> responseObserver) * Stream URLs due for fetching from M queues with up to N items per queue *default voidgetURLStatus(Urlfrontier.URLStatusRequest request, io.grpc.stub.StreamObserver<Urlfrontier.URLItem> responseObserver) * Get status of a particular URL This does not take into account URL scheduling.default voidlistCrawls(Urlfrontier.Local request, io.grpc.stub.StreamObserver<Urlfrontier.StringList> responseObserver) * Return the list of crawls handled by the frontier(s) *default voidlistNodes(Urlfrontier.Empty request, io.grpc.stub.StreamObserver<Urlfrontier.StringList> responseObserver) * Return the list of nodes forming the cluster the current node belongs to *default voidlistQueues(Urlfrontier.Pagination request, io.grpc.stub.StreamObserver<Urlfrontier.QueueList> responseObserver) * Return a list of queues for a specific crawl.default voidlistURLs(Urlfrontier.ListUrlParams request, io.grpc.stub.StreamObserver<Urlfrontier.URLItem> responseObserver) * List all URLs currently in the frontier This does not take into account URL scheduling.default io.grpc.stub.StreamObserver<Urlfrontier.URLItem>putURLs(io.grpc.stub.StreamObserver<Urlfrontier.AckMessage> responseObserver) * Push URL items to the server; they get created (if they don't already exist) in case of DiscoveredURLItems or updated if KnownURLItems *default voidsetActive(Urlfrontier.Active request, io.grpc.stub.StreamObserver<Urlfrontier.Empty> responseObserver) * De/activate the crawl.default voidsetCrawlLimit(Urlfrontier.CrawlLimitParams request, io.grpc.stub.StreamObserver<Urlfrontier.Empty> responseObserver) * Sets crawl limit for domain *default voidsetDelay(Urlfrontier.QueueDelayParams request, io.grpc.stub.StreamObserver<Urlfrontier.Empty> responseObserver) * Set a delay from a given queue.default voidsetLogLevel(Urlfrontier.LogLevelParams request, io.grpc.stub.StreamObserver<Urlfrontier.Empty> responseObserver) * Overrides the log level for a given package *
-
Method Details
-
listNodes
default void listNodes(Urlfrontier.Empty request, io.grpc.stub.StreamObserver<Urlfrontier.StringList> responseObserver) * Return the list of nodes forming the cluster the current node belongs to *
-
listCrawls
default void listCrawls(Urlfrontier.Local request, io.grpc.stub.StreamObserver<Urlfrontier.StringList> responseObserver) * Return the list of crawls handled by the frontier(s) *
-
deleteCrawl
default void deleteCrawl(Urlfrontier.DeleteCrawlMessage request, io.grpc.stub.StreamObserver<Urlfrontier.Long> responseObserver) * Delete an entire crawl, returns the number of URLs removed this way *
-
listQueues
default void listQueues(Urlfrontier.Pagination request, io.grpc.stub.StreamObserver<Urlfrontier.QueueList> responseObserver) * Return a list of queues for a specific crawl. Can chose whether to include inactive queues (a queue is active if it has URLs due for fetching); by default the service will return up to 100 results from offset 0 and exclude inactive queues.*
-
getURLs
default void getURLs(Urlfrontier.GetParams request, io.grpc.stub.StreamObserver<Urlfrontier.URLInfo> responseObserver) * Stream URLs due for fetching from M queues with up to N items per queue *
-
putURLs
default io.grpc.stub.StreamObserver<Urlfrontier.URLItem> putURLs(io.grpc.stub.StreamObserver<Urlfrontier.AckMessage> responseObserver) * Push URL items to the server; they get created (if they don't already exist) in case of DiscoveredURLItems or updated if KnownURLItems *
-
getStats
default void getStats(Urlfrontier.QueueWithinCrawlParams request, io.grpc.stub.StreamObserver<Urlfrontier.Stats> responseObserver) * Return stats for a specific queue or an entire crawl. Does not aggregate the stats across different crawlids. *
-
deleteQueue
default void deleteQueue(Urlfrontier.QueueWithinCrawlParams request, io.grpc.stub.StreamObserver<Urlfrontier.Long> responseObserver) * Delete the queue based on the key in parameter, returns the number of URLs removed this way *
-
blockQueueUntil
default void blockQueueUntil(Urlfrontier.BlockQueueParams request, io.grpc.stub.StreamObserver<Urlfrontier.Empty> responseObserver) * Block a queue from sending URLs; the argument is the number of seconds of UTC time since Unix epoch 1970-01-01T00:00:00Z. The default value of 0 will unblock the queue. The block will get removed once the time indicated in argument is reached. This is useful for cases where a server returns a Retry-After for instance.
-
setActive
default void setActive(Urlfrontier.Active request, io.grpc.stub.StreamObserver<Urlfrontier.Empty> responseObserver) * De/activate the crawl. GetURLs will not return anything until SetActive is set to true. PutURLs will still take incoming data. *
-
getActive
default void getActive(Urlfrontier.Local request, io.grpc.stub.StreamObserver<Urlfrontier.Boolean> responseObserver) * Returns true if the crawl is active, false if it has been deactivated with SetActive(Boolean) *
-
setDelay
default void setDelay(Urlfrontier.QueueDelayParams request, io.grpc.stub.StreamObserver<Urlfrontier.Empty> responseObserver) * Set a delay from a given queue. No URLs will be obtained via GetURLs for this queue until the number of seconds specified has elapsed since the last time URLs were retrieved. Usually informed by the delay setting of robots.txt.
-
setLogLevel
default void setLogLevel(Urlfrontier.LogLevelParams request, io.grpc.stub.StreamObserver<Urlfrontier.Empty> responseObserver) * Overrides the log level for a given package *
-
setCrawlLimit
default void setCrawlLimit(Urlfrontier.CrawlLimitParams request, io.grpc.stub.StreamObserver<Urlfrontier.Empty> responseObserver) * Sets crawl limit for domain *
-
getURLStatus
default void getURLStatus(Urlfrontier.URLStatusRequest request, io.grpc.stub.StreamObserver<Urlfrontier.URLItem> responseObserver) * Get status of a particular URL This does not take into account URL scheduling. Used to check current status of an URL within the frontier
-
listURLs
default void listURLs(Urlfrontier.ListUrlParams request, io.grpc.stub.StreamObserver<Urlfrontier.URLItem> responseObserver) * List all URLs currently in the frontier This does not take into account URL scheduling. Used to check current status of all URLs within the frontier
-