CREATE TABLE IF NOT EXISTS hits (
....
)
DUPLICATE KEY (CounterID, EventDate, UserID, EventTime, WatchID)
DISTRIBUTED BY HASH(UserID) BUCKETS 48
PROPERTIES (
"replication_num"="1",
"compression"="ZSTD");
CREATE TABLE IF NOT EXISTS hits (
....
) DUPLICATE KEY (CounterID, EventDate, UserID, EventTime, WatchID)
DISTRIBUTED BY HASH(UserID) BUCKETS 4
PROPERTIES (
"replication_num"="1",
"compression"="ZSTD",
"storage_page_size"="1048576");
SELECT
COLUMN_NAME, COLUMN_TYPE,
sum(COMPRESSED_DATA_BYTES) AS compressed_bytes,
sum(UNCOMPRESSED_DATA_BYTES) AS uncompressed_bytes,
sum(RAW_DATA_BYTES) AS raw_data_bytes,
round(sum(COMPRESSED_DATA_BYTES) * 100.0 / sum(UNCOMPRESSED_DATA_BYTES), 2) as ratio
FROM information_schema.column_data_sizes
WHERE table_id = 1761704935641
GROUP BY COLUMN_NAME, COLUMN_TYPE
ORDER BY sum(COMPRESSED_DATA_BYTES) DESC;
+-----------------------+-------------+------------------+--------------------+----------------+--------+
| COLUMN_NAME | COLUMN_TYPE | compressed_bytes | uncompressed_bytes | raw_data_bytes | ratio |
+-----------------------+-------------+------------------+--------------------+----------------+--------+
| URL | STRING | 1747139004 | 9404393858 | 9038895826 | 18.58 |
| Referer | STRING | 1552943801 | 7023847152 | 6662498316 | 22.11 |
| Title | STRING | 1480554020 | 9838412581 | 9488276782 | 15.05 |
| OriginalURL | STRING | 810663093 | 5680006400 | 5317485214 | 14.27 |
| WatchID | BIGINT | 781560948 | 781560948 | 799979976 | 100.00 |
| URLHash | BIGINT | 760852247 | 766458338 | 799979976 | 99.27 |
| RefererHash | BIGINT | 743785927 | 747950617 | 799979976 | 99.44 |
| FUniqID | BIGINT | 389556325 | 512285220 | 799979976 | 76.04 |
| UserID | BIGINT | 379008085 | 495166618 | 799979976 | 76.54 |
| HID | INT | 371392630 | 371392630 | 399989988 | 100.00 |
+-----------------------+-------------+------------------+--------------------+----------------+--------+
+-----------------------+-------------+------------------+--------------------+----------------+--------+
| COLUMN_NAME | COLUMN_TYPE | compressed_bytes | uncompressed_bytes | raw_data_bytes | ratio |
+-----------------------+-------------+------------------+--------------------+----------------+--------+
| URL | STRING | 1455177520 | 9057197818 | 9038895826 | 16.07 |
| Referer | STRING | 1331679271 | 6730874117 | 6662498316 | 19.78 |
| Title | STRING | 1122300920 | 9505664009 | 9488276782 | 11.81 |
| WatchID | BIGINT | 800004249 | 800004249 | 799979976 | 100.00 |
| OriginalURL | STRING | 768372911 | 5402777190 | 5317485214 | 14.22 |
+-----------------------+-------------+------------------+--------------------+----------------+--------+
+-----------------------+-------------+------------------+--------------------+----------------+--------+
| COLUMN_NAME | COLUMN_TYPE | compressed_bytes | uncompressed_bytes | raw_data_bytes | ratio |
+-----------------------+-------------+------------------+--------------------+----------------+--------+
| WatchID | BIGINT | 800004249 | 800004249 | 799979976 | 100.00 |
| URLHash | BIGINT | 348739226 | 800004249 | 799979976 | 43.59 |
| RefererHash | BIGINT | 295833828 | 800004249 | 799979976 | 36.98 |
| FUniqID | BIGINT | 169720968 | 800004249 | 799979976 | 21.22 |
| UserID | BIGINT | 169536965 | 800004249 | 799979976 | 21.19 |
+-----------------------+-------------+------------------+--------------------+----------------+--------+
SELECT
COLUMN_NAME,COLUMN_TYPE,
sum(COMPRESSED_DATA_BYTES) AS compressed_bytes,
sum(UNCOMPRESSED_DATA_BYTES) as uncompressed_bytes,
sum(RAW_DATA_BYTES) as raw_data_bytes,
round(sum(COMPRESSED_DATA_BYTES) * 100.0 / sum(UNCOMPRESSED_DATA_BYTES), 2) as ratio
FROM information_schema.column_data_sizes
WHERE table_id=1761728747278
GROUP BY COLUMN_NAME, COLUMN_TYPE
ORDER BY sum(COMPRESSED_DATA_BYTES) desc limit 1;
+-----------------------+-------------+------------------+--------------------+----------------+--------+
| COLUMN_NAME | COLUMN_TYPE | compressed_bytes | uncompressed_bytes | raw_data_bytes | ratio |
+-----------------------+-------------+------------------+--------------------+----------------+--------+
| URL | STRING | 1456833417 | 9144581584 | 9038895826 | 15.93 |
+-----------------------+-------------+------------------+--------------------+----------------+--------+
create table t1(
`URL` varchar(8000) NOT NULL
) DUPLICATE KEY (URL)
DISTRIBUTED BY HASH(URL) BUCKETS 1
PROPERTIES ( "replication_num"="1", "storage_page_size"="1048576");
insert into t1 select URL from hits;
+-------------+-------------+------------------+--------------------+----------------+-------+
| COLUMN_NAME | COLUMN_TYPE | compressed_bytes | uncompressed_bytes | raw_data_bytes | ratio |
+-------------+-------------+------------------+--------------------+----------------+-------+
| URL | VARCHAR | 773983002 | 9148474115 | 9038895826 | 8.46 |
+-------------+-------------+------------------+--------------------+----------------+-------+
- END -
更多标杆企业信赖
智慧金融与政企:东北证券|国金证券|国信证券|杭银消金|河北幸福消费金融|汇添富基金|金融壹账通|陆金所控股|霖梓控股|拉卡拉|平安人寿 | Planet| 奇富科技|上海证券 | 同程数科|通联支付|无锡锡商银行|星云零售信贷|星火保 | 宇信科技|银联商务|易生支付|招商信诺人寿|招联金融|中信银行信用卡中心|360 数科|360 企业安全浏览器
互联网与文娱:菜鸟|抖音集团|斗鱼|叮咚买菜|浩瀚深度|京东|工商信息查询平台|货拉拉|快手|荔枝微课|票务平台|墨迹天气|MiniMax|奇安信|趣丸科技|顺丰科技|腾讯音乐|天眼查|网易|网易游戏|网易严选|网易云信|网易云音乐|小米|小鹅通|迅雷|约苗|字节跳动|知乎|360 商业化
企业服务与新经济:宝尊科技| 波司登|Cisco|橙联|度言|观测云|慧策|快成物流|领健|领创|灵犀科技|名创优品|Moka BI|美联物业|钱大妈|拈花云科|森马 |思必驰|顺丰科技|上海家化 | 物易云通|云积互动|有赞|雨润集团|纵腾集团|中通快递
先进智造与电信:爱玛|长安汽车|极越汽车|金风科技|科大讯飞|岚图汽车|Lifewit|哪吒科技|四川航空|上海通用五菱|三星电子|蜀海供应链|特步|天翼云|雅迪|中国联通
作为基于 Apache Doris 的商业化公司,飞轮科技秉承着 “开源技术创新”和“实时数仓服务”双轮驱动的战略,在投入资源大力参与 Apache Doris 社区研发和推广的同时,基于 Apache Doris 内核打造了聚焦于企业大数据实时分析需求的企业级产品 SelectDB ,面向新一代需求打造世界领先的实时分析能力。自 2022 年成立以来,获得 IDG 资本、红杉中国、襄禾资本等顶级 VC 的近 10 亿元融资,创下了近年来开源基础软件领域的新纪录。

