post by yamamoto | 0 | 180 views

Mongodb で大量データ(390万件弱)をインポートしてみた

DPC外来のEFファイル(32列、2.1GB、3,883,160件)をMongodbにインポートしてみました。進捗の表示はずっと0.0Bなので、インポートできないと思ったが、一応ちゃんとインポートされて、所要時間はおよそ2分でした。しかもデータベース上の容量は 0.461GBで収またのですね。

db.EFn.find({"EF-0":201504}) で検索してみてもすぐレスポンスがあって、全然ストレスを感じませんでした。複雑な mapReduce はしていませんが、これからいろいろ分析してみます。

【以下オペレーション記録】
mongoimport –file DPC_EFn_201504-12_title.txt –headerline –type tsv -d dpc -c EFn –drop –ignoreBlanks

2016-03-24T12:16:06.388+0900 connected to: localhost

2016-03-24T12:16:06.389+0900 dropping: dpc.EFn

2016-03-24T12:16:09.378+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:16:12.378+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:16:15.377+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:16:18.378+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:16:21.378+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:16:24.377+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:16:27.377+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:16:30.377+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:16:33.377+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:16:36.377+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:16:39.377+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:16:42.377+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:16:45.377+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:16:48.377+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:16:51.378+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:16:54.377+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:16:57.377+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:17:00.377+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:17:03.377+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:17:06.378+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:17:09.378+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:17:12.377+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:17:15.378+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:17:18.378+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:17:21.378+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:17:24.378+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:17:27.378+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:17:30.378+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:17:33.378+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:17:36.378+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:17:39.378+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:17:42.377+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:17:45.378+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:17:48.377+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:17:51.378+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:17:54.377+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:17:57.377+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:18:00.378+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:18:03.377+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:18:06.378+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:18:07.265+0900 [……………………] dpc.EFn 0.0 B/2.1 GB (0.0%)

2016-03-24T12:18:07.265+0900 imported 3883160 documents

Alderamin:working yamamoto$ mongo

MongoDB shell version: 3.2.4

connecting to: test

> show dbs

dpc    0.461GB

local  0.000GB

> use dpc

switched to db dpc

> show collections

EFn

> db.stats()

{

"db" : "dpc",

"collections" : 1,

"objects" : 3883160,

"avgObjSize" : 769.4172084075856,

"dataSize" : 2987770127,

"storageSize" : 459800576,

"numExtents" : 0,

"indexes" : 1,

"indexSize" : 35176448,

"ok" : 1

}

> db.EFn.stats()

{

"ns" : "dpc.EFn",

"count" : 3883160,

"size" : 2987770127,

"avgObjSize" : 769,

"storageSize" : 459800576,

"capped" : false,

"wiredTiger" : {

"metadata" : {

"formatVersion" : 1

},

"creationString" : "allocation_size=4KB,app_metadata=(formatVersion=1),block_allocation=best,block_compressor=snappy,cache_resident=0,checksum=on,colgroups=,collator=,columns=,dictionary=0,encryption=(keyid=,name=),exclusive=0,extractor=,format=btree,huffman_key=,huffman_value=,immutable=0,internal_item_max=0,internal_key_max=0,internal_key_truncate=,internal_page_max=4KB,key_format=q,key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,leaf_value_max=64MB,log=(enabled=),lsm=(auto_throttle=,bloom=,bloom_bit_count=16,bloom_config=,bloom_hash_count=8,bloom_oldest=0,chunk_count_limit=0,chunk_max=5GB,chunk_size=10MB,merge_max=15,merge_min=0),memory_page_max=10m,os_cache_dirty_max=0,os_cache_max=0,prefix_compression=0,prefix_compression_min=4,source=,split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,type=file,value_format=u",

"type" : "file",

"uri" : "statistics:table:collection-2-5367646882288087581",

"LSM" : {

"bloom filters in the LSM tree" : 0,

"bloom filter false positives" : 0,

"bloom filter hits" : 0,

"bloom filter misses" : 0,

"bloom filter pages evicted from cache" : 0,

"bloom filter pages read into cache" : 0,

"total size of bloom filters" : 0,

"sleep for LSM checkpoint throttle" : 0,

"chunks in the LSM tree" : 0,

"highest merge generation in the LSM tree" : 0,

"queries that could have benefited from a Bloom filter that did not exist" : 0,

"sleep for LSM merge throttle" : 0

},

"block-manager" : {

"file allocation unit size" : 4096,

"blocks allocated" : 107910,

"checkpoint size" : 459759616,

"allocations requiring file extension" : 107902,

"blocks freed" : 12,

"file magic number" : 120897,

"file major version number" : 1,

"minor version number" : 0,

"file bytes available for reuse" : 36864,

"file size in bytes" : 459800576

},

"btree" : {

"btree checkpoint generation" : 133,

"column-store variable-size deleted values" : 0,

"column-store fixed-size leaf pages" : 0,

"column-store internal pages" : 0,

"column-store variable-size RLE encoded values" : 0,

"column-store variable-size leaf pages" : 0,

"pages rewritten by compaction" : 0,

"number of key/value pairs" : 0,

"fixed-record size" : 0,

"maximum tree depth" : 3,

"maximum internal page key size" : 368,

"maximum internal page size" : 4096,

"maximum leaf page key size" : 2867,

"maximum leaf page size" : 32768,

"maximum leaf page value size" : 67108864,

"overflow pages" : 0,

"row-store internal pages" : 0,

"row-store leaf pages" : 0

},

"cache" : {

"bytes read into cache" : 0,

"bytes written from cache" : 3024994726,

"checkpoint blocked page eviction" : 0,

"unmodified pages evicted" : 0,

"page split during eviction deepened the tree" : 0,

"modified pages evicted" : 0,

"data source pages selected for eviction unable to be evicted" : 0,

"hazard pointer blocked page eviction" : 0,

"internal pages evicted" : 0,

"internal pages split during eviction" : 0,

"leaf pages split during eviction" : 0,

"in-memory page splits" : 390,

"in-memory page passed criteria to be split" : 780,

"overflow values cached in memory" : 0,

"pages read into cache" : 0,

"pages read into cache requiring lookaside entries" : 0,

"overflow pages read into cache" : 0,

"pages written from cache" : 107905,

"page written requiring lookaside records" : 0,

"pages written requiring in-memory restoration" : 0

},

"compression" : {

"raw compression call failed, no additional data available" : 0,

"raw compression call failed, additional data available" : 0,

"raw compression call succeeded" : 0,

"compressed pages read" : 0,

"compressed pages written" : 107377,

"page written failed to compress" : 0,

"page written was too small to compress" : 528

},

"cursor" : {

"create calls" : 2,

"insert calls" : 3883160,

"bulk-loaded cursor-insert calls" : 0,

"cursor-insert key and value bytes inserted" : 3003220402,

"next calls" : 0,

"prev calls" : 1,

"remove calls" : 0,

"cursor-remove key bytes removed" : 0,

"reset calls" : 3883161,

"restarted searches" : 0,

"search calls" : 0,

"search near calls" : 0,

"truncate calls" : 0,

"update calls" : 0,

"cursor-update value bytes updated" : 0

},

"reconciliation" : {

"dictionary matches" : 0,

"internal page multi-block writes" : 5,

"leaf page multi-block writes" : 393,

"maximum blocks required for a page" : 480,

"internal-page overflow keys" : 0,

"leaf-page overflow keys" : 0,

"overflow values written" : 0,

"pages deleted" : 0,

"fast-path pages deleted" : 0,

"page checksum matches" : 547,

"page reconciliation calls" : 401,

"page reconciliation calls for eviction" : 0,

"leaf page key bytes discarded using prefix compression" : 0,

"internal page key bytes discarded using suffix compression" : 107640

},

"session" : {

"object compaction" : 0,

"open cursor count" : 2

},

"transaction" : {

"update conflicts" : 0

}

},

"nindexes" : 1,

"totalIndexSize" : 35176448,

"indexSizes" : {

"_id_" : 35176448

},

"ok" : 1

}

 

Related posts:

Comments are closed.